22 if (capacity <= 0)
return;
24 loss_.assign(
static_cast<size_t>(capacity), 0.0);
25 grad_norm_.assign(
static_cast<size_t>(capacity), 0.0);
26 time_ms_.assign(
static_cast<size_t>(capacity), 0.0);
40 void record(
int idx,
double loss,
double grad_norm,
double time_ms = 0.0) {
41 if (idx < 0 || idx >= capacity_)
return;
42 size_t i =
static_cast<size_t>(idx);
44 grad_norm_[i] = grad_norm;
45 time_ms_[i] = time_ms;
46 size_ = std::max(size_, idx + 1);
50 void copy_to_host(std::vector<double> &loss_out, std::vector<double> &grad_norm_out)
const {
51 loss_out.assign(loss_.begin(), loss_.begin() + size_);
52 grad_norm_out.assign(grad_norm_.begin(), grad_norm_.begin() + size_);
57 std::vector<double> &loss_out, std::vector<double> &grad_norm_out, std::vector<double> &time_ms_out)
const {
58 loss_out.assign(loss_.begin(), loss_.begin() + size_);
59 grad_norm_out.assign(grad_norm_.begin(), grad_norm_.begin() + size_);
60 time_ms_out.assign(time_ms_.begin(), time_ms_.begin() + size_);
64 int size()
const {
return size_; }
67 std::vector<double> loss_;
68 std::vector<double> grad_norm_;
69 std::vector<double> time_ms_;
84 void init(
int capacity) {
85 if (capacity <= 0)
return;
87 loss_.resize(
static_cast<size_t>(capacity));
88 grad_norm_.resize(
static_cast<size_t>(capacity));
89 time_ms_.resize(
static_cast<size_t>(capacity));
94 void reset() { size_ = 0; }
104 if (idx < 0 || idx >= capacity_)
return;
106 cudaMemcpy(loss_.data() + idx, &loss,
sizeof(
cuda_mlp::CudaScalar), cudaMemcpyHostToDevice),
"record loss");
108 cudaMemcpy(grad_norm_.data() + idx, &grad_norm,
sizeof(
cuda_mlp::CudaScalar), cudaMemcpyHostToDevice),
111 cudaMemcpy(time_ms_.data() + idx, &time_ms,
sizeof(
cuda_mlp::CudaScalar), cudaMemcpyHostToDevice),
"record time_ms");
112 size_ = std::max(size_, idx + 1);
116 void copy_to_host(std::vector<cuda_mlp::CudaScalar> &loss_out, std::vector<cuda_mlp::CudaScalar> &grad_norm_out)
const {
117 loss_out.resize(size_);
118 grad_norm_out.resize(size_);
119 if (size_ == 0)
return;
120 loss_.copy_to_host(loss_out.data(), size_);
121 grad_norm_.copy_to_host(grad_norm_out.data(), size_);
125 void copy_to_host(std::vector<cuda_mlp::CudaScalar> &loss_out,
126 std::vector<cuda_mlp::CudaScalar> &grad_norm_out,
127 std::vector<cuda_mlp::CudaScalar> &time_ms_out)
const {
128 loss_out.resize(size_);
129 grad_norm_out.resize(size_);
130 time_ms_out.resize(size_);
131 if (size_ == 0)
return;
132 loss_.copy_to_host(loss_out.data(), size_);
133 grad_norm_.copy_to_host(grad_norm_out.data(), size_);
134 time_ms_.copy_to_host(time_ms_out.data(), size_);
138 int size()
const {
return size_; }
void copy_to_host(std::vector< double > &loss_out, std::vector< double > &grad_norm_out) const
Copy recorded loss and gradient norm to output vectors.
Definition: iteration_recorder.hpp:50
int size() const
Current number of recorded entries.
Definition: iteration_recorder.hpp:64
void init(int capacity)
Allocate buffers for up to capacity iterations.
Definition: iteration_recorder.hpp:21
void reset()
Reset recorded size without releasing memory.
Definition: iteration_recorder.hpp:31
void copy_to_host(std::vector< double > &loss_out, std::vector< double > &grad_norm_out, std::vector< double > &time_ms_out) const
Copy recorded loss, gradient norm, and time to output vectors.
Definition: iteration_recorder.hpp:56
void record(int idx, double loss, double grad_norm, double time_ms=0.0)
Record a loss/grad/time entry at iteration index.
Definition: iteration_recorder.hpp:40
Iteration recorder specialized by backend (CPU/CUDA).
Definition: iteration_recorder.hpp:13
Owning buffer for device memory.
Definition: device_buffer.cuh:7
float CudaScalar
Scalar type used across CUDA kernels and optimizers.
Definition: common.cuh:11
void cuda_check(cudaError_t err, const char *msg)
Check a CUDA API call and abort with a message on failure.
Definition: common.cuh:18
Backend tag for CPU implementations.
Definition: network_wrapper.hpp:20
Backend tag for CUDA implementations.
Definition: network_wrapper.hpp:22