3 #include "../common.hpp"
30 using S_VecFun = std::function<double(
const V &,
const V &,
const V &)>;
31 using BatchGradFun = std::function<void(
const V &,
const std::vector<size_t> &, V &)>;
69 int print_every = 50) {
70 if (_inputs.cols() == 0 || _targets.cols() == 0) {
71 throw std::runtime_error(
"No data set for StochasticGradientDescent::stochastic_solve");
74 int N =
static_cast<int>(_inputs.cols());
75 std::vector<size_t> full_indices(
static_cast<size_t>(N));
76 for (
int i = 0; i < N; ++i) {
77 full_indices[
static_cast<size_t>(i)] =
static_cast<size_t>(i);
81 int dim =
static_cast<int>(w.size());
87 const bool timing = (this->
recorder_ !=
nullptr);
89 auto start_time = std::chrono::steady_clock::now();
91 while (_iters < _max_iters) {
93 double last_grad_norm = 0.0;
95 for (
int t = 0; t < m; ++t) {
98 V grad_est = V::Zero(dim);
100 _batch_g(w, minibatch_indices, grad_est);
101 last_grad_norm = grad_est.norm();
103 w = w - step_size * grad_est;
105 passes +=
static_cast<double>(b) /
static_cast<double>(N);
109 for (
int i = 0; i < N; ++i) {
110 loss += _sf(w, _inputs.col(i), _targets.col(i));
112 loss /=
static_cast<double>(N);
114 if (verbose && ((_iters % print_every) == 0)) {
115 std::cout <<
"Epoch " << (_iters + 1) <<
" loss=" << loss <<
" passes=" << passes << std::endl;
119 double grad_norm = last_grad_norm;
121 V full_grad = V::Zero(dim);
122 _batch_g(w, full_indices, full_grad);
123 grad_norm = full_grad.norm();
125 double elapsed_ms = 0.0;
127 auto now = std::chrono::steady_clock::now();
128 elapsed_ms = std::chrono::duration<double, std::milli>(now - start_time).count();
146 template <
typename V,
typename M>
148 const size_t N,
size_t batch_size, std::mt19937 &rng) {
149 if (N == 0 || batch_size == 0) {
150 std::cerr <<
"Warning: sampling minibatch from empty dataset or with batch_size=0." << std::endl;
154 std::vector<size_t> idx(N);
155 for (
size_t i = 0; i < N; ++i) {
159 if (batch_size >= N) {
163 for (
size_t i = 0; i < batch_size; ++i) {
164 std::uniform_int_distribution<size_t> dist(i, N - 1);
165 size_t j = dist(rng);
166 std::swap(idx[i], idx[j]);
168 idx.resize(batch_size);
void reset()
Reset recorded size without releasing memory.
Definition: iteration_recorder.hpp:31
void record(int idx, double loss, double grad_norm, double time_ms=0.0)
Record a loss/grad/time entry at iteration index.
Definition: iteration_recorder.hpp:40
Stochastic Gradient Descent (SGD) minimizer.
Definition: s_gd.hpp:17
V stochastic_solve(const V &init_w, int m, int b, double step, bool verbose=false, int print_every=50)
Execute Stochastic Solve.
Definition: s_gd.hpp:63
static std::vector< size_t > sample_minibatch_indices(const size_t N, size_t batch_size, std::mt19937 &rng)
Helper to sample minibatch indices without replacement (Fisher-Yates shuffle subset).
Definition: s_gd.hpp:147
std::function< void(const V &, const std::vector< size_t > &, V &)> BatchGradFun
Definition: s_gd.hpp:31
std::function< double(const V &, const V &, const V &)> S_VecFun
Definition: s_gd.hpp:30
void setData(const M &inputs, const M &targets, const S_VecFun &f, const BatchGradFun &g)
Configure data and callbacks for the solver.
Definition: s_gd.hpp:46
StochasticGradientDescent()=default
Default constructor.
Base class for Stochastic Minimizers.
Definition: stochastic_minimizer.hpp:16
void setMaxIterations(int max_iters)
Sets the maximum number of iterations.
Definition: stochastic_minimizer.hpp:24
unsigned int _iters
Definition: stochastic_minimizer.hpp:45
unsigned int _max_iters
Definition: stochastic_minimizer.hpp:44
void setTolerance(double tol)
Sets the tolerance for convergence (full gradient norm).
Definition: stochastic_minimizer.hpp:36
::IterationRecorder< CpuBackend > * recorder_
Optional recorder for diagnostics.
Definition: stochastic_minimizer.hpp:48
void setStepSize(double s)
Sets the step size (learning rate).
Definition: stochastic_minimizer.hpp:30
double step_size
Definition: stochastic_minimizer.hpp:47
double _tol
Definition: stochastic_minimizer.hpp:46
constexpr unsigned int kDefaultSeed
Definition: seed.hpp:4