My Project
iteration_recorder.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <algorithm>
4 #include <vector>
5 
6 struct CpuBackend;
7 struct CudaBackend;
8 
13 template <typename Backend> class IterationRecorder;
14 
18 template <> class IterationRecorder<CpuBackend> {
19 public:
21  void init(int capacity) {
22  if (capacity <= 0) return;
23  capacity_ = capacity;
24  loss_.assign(static_cast<size_t>(capacity), 0.0);
25  grad_norm_.assign(static_cast<size_t>(capacity), 0.0);
26  time_ms_.assign(static_cast<size_t>(capacity), 0.0);
27  size_ = 0;
28  }
29 
31  void reset() { size_ = 0; }
32 
40  void record(int idx, double loss, double grad_norm, double time_ms = 0.0) {
41  if (idx < 0 || idx >= capacity_) return;
42  size_t i = static_cast<size_t>(idx);
43  loss_[i] = loss;
44  grad_norm_[i] = grad_norm;
45  time_ms_[i] = time_ms;
46  size_ = std::max(size_, idx + 1);
47  }
48 
50  void copy_to_host(std::vector<double> &loss_out, std::vector<double> &grad_norm_out) const {
51  loss_out.assign(loss_.begin(), loss_.begin() + size_);
52  grad_norm_out.assign(grad_norm_.begin(), grad_norm_.begin() + size_);
53  }
54 
57  std::vector<double> &loss_out, std::vector<double> &grad_norm_out, std::vector<double> &time_ms_out) const {
58  loss_out.assign(loss_.begin(), loss_.begin() + size_);
59  grad_norm_out.assign(grad_norm_.begin(), grad_norm_.begin() + size_);
60  time_ms_out.assign(time_ms_.begin(), time_ms_.begin() + size_);
61  }
62 
64  int size() const { return size_; }
65 
66 private:
67  std::vector<double> loss_;
68  std::vector<double> grad_norm_;
69  std::vector<double> time_ms_;
70  int capacity_ = 0;
71  int size_ = 0;
72 };
73 
74 #ifdef __CUDACC__
75  #include "cuda/common.cuh"
76  #include "cuda/device_buffer.cuh"
77 
81 template <> class IterationRecorder<CudaBackend> {
82 public:
84  void init(int capacity) {
85  if (capacity <= 0) return;
86  capacity_ = capacity;
87  loss_.resize(static_cast<size_t>(capacity));
88  grad_norm_.resize(static_cast<size_t>(capacity));
89  time_ms_.resize(static_cast<size_t>(capacity));
90  size_ = 0;
91  }
92 
94  void reset() { size_ = 0; }
95 
103  void record(int idx, cuda_mlp::CudaScalar loss, cuda_mlp::CudaScalar grad_norm, cuda_mlp::CudaScalar time_ms = 0) {
104  if (idx < 0 || idx >= capacity_) return;
106  cudaMemcpy(loss_.data() + idx, &loss, sizeof(cuda_mlp::CudaScalar), cudaMemcpyHostToDevice), "record loss");
108  cudaMemcpy(grad_norm_.data() + idx, &grad_norm, sizeof(cuda_mlp::CudaScalar), cudaMemcpyHostToDevice),
109  "record grad_norm");
111  cudaMemcpy(time_ms_.data() + idx, &time_ms, sizeof(cuda_mlp::CudaScalar), cudaMemcpyHostToDevice), "record time_ms");
112  size_ = std::max(size_, idx + 1);
113  }
114 
116  void copy_to_host(std::vector<cuda_mlp::CudaScalar> &loss_out, std::vector<cuda_mlp::CudaScalar> &grad_norm_out) const {
117  loss_out.resize(size_);
118  grad_norm_out.resize(size_);
119  if (size_ == 0) return;
120  loss_.copy_to_host(loss_out.data(), size_);
121  grad_norm_.copy_to_host(grad_norm_out.data(), size_);
122  }
123 
125  void copy_to_host(std::vector<cuda_mlp::CudaScalar> &loss_out,
126  std::vector<cuda_mlp::CudaScalar> &grad_norm_out,
127  std::vector<cuda_mlp::CudaScalar> &time_ms_out) const {
128  loss_out.resize(size_);
129  grad_norm_out.resize(size_);
130  time_ms_out.resize(size_);
131  if (size_ == 0) return;
132  loss_.copy_to_host(loss_out.data(), size_);
133  grad_norm_.copy_to_host(grad_norm_out.data(), size_);
134  time_ms_.copy_to_host(time_ms_out.data(), size_);
135  }
136 
138  int size() const { return size_; }
139 
140 private:
144  int capacity_ = 0;
145  int size_ = 0;
146 };
147 #endif
void copy_to_host(std::vector< double > &loss_out, std::vector< double > &grad_norm_out) const
Copy recorded loss and gradient norm to output vectors.
Definition: iteration_recorder.hpp:50
int size() const
Current number of recorded entries.
Definition: iteration_recorder.hpp:64
void init(int capacity)
Allocate buffers for up to capacity iterations.
Definition: iteration_recorder.hpp:21
void reset()
Reset recorded size without releasing memory.
Definition: iteration_recorder.hpp:31
void copy_to_host(std::vector< double > &loss_out, std::vector< double > &grad_norm_out, std::vector< double > &time_ms_out) const
Copy recorded loss, gradient norm, and time to output vectors.
Definition: iteration_recorder.hpp:56
void record(int idx, double loss, double grad_norm, double time_ms=0.0)
Record a loss/grad/time entry at iteration index.
Definition: iteration_recorder.hpp:40
Iteration recorder specialized by backend (CPU/CUDA).
Definition: iteration_recorder.hpp:13
Owning buffer for device memory.
Definition: device_buffer.cuh:7
float CudaScalar
Scalar type used across CUDA kernels and optimizers.
Definition: common.cuh:11
void cuda_check(cudaError_t err, const char *msg)
Check a CUDA API call and abort with a message on failure.
Definition: common.cuh:18
Backend tag for CPU implementations.
Definition: network_wrapper.hpp:20
Backend tag for CUDA implementations.
Definition: network_wrapper.hpp:22