My Project
lbfgs.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "../common.hpp"
5 #include "ring_buffer.hpp"
6 #include <autodiff/forward/dual.hpp>
7 #include <Eigen/Eigen>
8 #include <autodiff/forward/dual.hpp>
9 #include <chrono>
10 
11 namespace cpu_mlp {
12 
17 template <typename V, typename M>
18 class LBFGS : public FullBatchMinimizer<V, M> {
20  using Base::_iters;
21  using Base::_max_iters;
22  using Base::_tol;
23 
24 public:
29  void setHistorySize(size_t history_size) { m = history_size; }
30 
38  V solve(V x, VecFun<V, double> &f, GradFun<V> &Gradient) override {
39 
40  RingBuffer<V> s_list(m);
41  RingBuffer<V> y_list(m);
42  RingBuffer<double> rho_list(m);
43 
44  V grad = Gradient(x);
45  V p = -grad;
46  V x_new = x;
47 
48  const bool timing = (this->recorder_ != nullptr);
49  if (this->recorder_) this->recorder_->reset();
50  auto start_time = std::chrono::steady_clock::now();
51 
52  for (_iters = 0; _iters < _max_iters; ++_iters) {
53  if (grad.norm() < _tol) {
54  break;
55  }
56 
57  p = compute_direction(grad, s_list, y_list, rho_list);
58 
59 
60  double alpha_wolfe;
61  // Heuristic for the first step
62  if (_iters == 0)
63  alpha_wolfe = std::min(1.0, 1.0 / grad.norm());
64  else
65  alpha_wolfe = this->line_search(x, p, f, Gradient);
66 
67  x_new = x + alpha_wolfe * p;
68  V s = x_new - x;
69 
70  V grad_new = Gradient(x_new);
71  V y = grad_new - grad;
72 
73  x = x_new;
74 
75  double ys = y.dot(s);
76 
77  if (ys > 1e-10) {
78  double rho = 1.0 / ys;
79  s_list.push_back(s);
80  y_list.push_back(y);
81  rho_list.push_back(rho);
82 
83 
84  }
85 
86  grad = grad_new;
87 
88  if (this->recorder_) {
89  double loss = f(x);
90  double elapsed_ms = 0.0;
91  if (timing) {
92  auto now = std::chrono::steady_clock::now();
93  elapsed_ms = std::chrono::duration<double, std::milli>(now - start_time).count();
94  }
95  this->recorder_->record(_iters, loss, grad.norm(), elapsed_ms);
96  }
97  }
98 
99  return x;
100  }
101 
106  V compute_direction(const V &grad,
107  const RingBuffer<V> &s_list,
108  const RingBuffer<V> &y_list,
109  const RingBuffer<double> &rho_list) {
110 
111  if (s_list.empty()) {
112  return -grad;
113  }
114 
115  V z = V::Zero(grad.size());
116  V q = grad;
117  std::vector<double> alpha_list(s_list.size());
118 
119  // Backward pass
120  for (int i = static_cast<int>(s_list.size()) - 1; i >= 0; --i) {
121  alpha_list[i] = rho_list[i] * s_list[i].dot(q);
122  q -= alpha_list[i] * y_list[i];
123  }
124 
125 
126  // Scaling
127  double gamma = s_list.back().dot(y_list.back()) /
128  y_list.back().dot(y_list.back());
129 
130  z = gamma * q;
131 
132  // Forward pass
133  for (size_t i = 0; i < s_list.size(); ++i) {
134  double beta = rho_list[i] * y_list[i].dot(z);
135  z += s_list[i] * (alpha_list[i] - beta);
136  }
137 
138  return -z;
139  }
140 
141 private:
142  size_t m = 16;
143 };
144 
145 } // namespace cpu_mlp
void reset()
Reset recorded size without releasing memory.
Definition: iteration_recorder.hpp:31
void record(int idx, double loss, double grad_norm, double time_ms=0.0)
Record a loss/grad/time entry at iteration index.
Definition: iteration_recorder.hpp:40
Base class for Full Batch Minimizers.
Definition: full_batch_minimizer.hpp:23
double rho
Definition: full_batch_minimizer.hpp:115
double _tol
Definition: full_batch_minimizer.hpp:109
double line_search(V x, V p, VecFun< V, double > &f, GradFun< V > &Gradient)
Backtracking Line Search satisfying Wolfe Conditions.
Definition: full_batch_minimizer.hpp:126
unsigned int _iters
Definition: full_batch_minimizer.hpp:108
::IterationRecorder< CpuBackend > * recorder_
Optional recorder for diagnostics.
Definition: full_batch_minimizer.hpp:110
unsigned int _max_iters
Definition: full_batch_minimizer.hpp:107
Limited-memory BFGS (L-BFGS) minimizer.
Definition: lbfgs.hpp:18
V solve(V x, VecFun< V, double > &f, GradFun< V > &Gradient) override
Solves the optimization problem using L-BFGS.
Definition: lbfgs.hpp:38
V compute_direction(const V &grad, const RingBuffer< V > &s_list, const RingBuffer< V > &y_list, const RingBuffer< double > &rho_list)
Two-loop recursion to compute search direction.
Definition: lbfgs.hpp:106
void setHistorySize(size_t history_size)
Set history size for L-BFGS.
Definition: lbfgs.hpp:29
A fixed-capacity ring buffer (circular buffer).
Definition: ring_buffer.hpp:15
bool empty() const
Checks if the buffer is empty.
Definition: ring_buffer.hpp:104
void push_back(const T &val)
Pushes a new element into the buffer.
Definition: ring_buffer.hpp:43
size_t size() const
Returns the number of elements currently stored.
Definition: ring_buffer.hpp:101
T & back()
Access the newest element.
Definition: ring_buffer.hpp:86
std::function< T(T)> GradFun
Gradient function type alias (T -> T).
Definition: common.hpp:32
std::function< W(T)> VecFun
Objective function type alias (T -> W).
Definition: common.hpp:35
Definition: layer.hpp:13