lbfgs-FFNN/bfgs_8hpp_source.html

 #pragma once


 #include "../common.hpp"

 #include "full_batch_minimizer.hpp"

 #include <Eigen/Eigen>

 #include <autodiff/reverse/var.hpp>

 #include <autodiff/reverse/var/eigen.hpp>


 namespace cpu_mlp {


 template <typename M> constexpr bool isSparse = std::is_base_of_v<Eigen::SparseMatrixBase<M>, M>;


 template <typename M>

 using DefaultSolverT = typename std::conditional<isSparse<M>, Eigen::ConjugateGradient<M>, Eigen::LDLT<M>>::type;


 template <typename V, typename M, typename Solver = DefaultSolverT<M>> class BFGS : public FullBatchMinimizer<V, M> {

   using Base = FullBatchMinimizer<V, M>;

   using Base::_iters;

   using Base::_max_iters;

   using Base::_tol;


 protected:

   static constexpr bool UseDefaultSolver = std::is_same_v<Solver, DefaultSolverT<M>>;

   using SolverT = typename std::conditional<UseDefaultSolver, Solver, Solver &>::type;


 private:

   SolverT _solver;

   M _B;


 public:

   BFGS()

   requires(UseDefaultSolver) { _solver = DefaultSolverT<M>(); }


   BFGS(Solver &solver)

   requires(!UseDefaultSolver) : _solver(solver) {}


   void setInitialHessian(const M &b) { _B = b; }


   V solve(V x, VecFun<V, double> &f, GradFun<V> &Gradient) override {


     // Initialize B if empty/size mismatch?

     // Usually B0 = I.

     if (_B.rows() != x.size()) {

       _B = M::Identity(x.size(), x.size());

     }


     for (_iters = 0; _iters < _max_iters && Gradient(x).norm() > _tol; ++_iters) {


       _solver.compute(_B);

       check(_solver.info() == Eigen::Success, "conjugate gradient solver error");


       V p = _solver.solve(-Gradient(x));


       double alpha = 1.0;

       alpha = this->line_search(x, p, f, Gradient);


       V s = alpha * p;

       V x_next = x + s;


       V y = Gradient(x_next) - Gradient(x);


       M b_prod = _B * s;

       _B = _B + (y * y.transpose()) / (y.transpose() * s) - (b_prod * b_prod.transpose()) / (s.transpose() * _B * s);


       x = x_next;

     }


     return x;

   }

 };


 } // namespace cpu_mlp

cpu_mlp::BFGS
BFGS (Broyden–Fletcher–Goldfarb–Shanno) minimizer.
Definition: bfgs.hpp:19

cpu_mlp::BFGS::SolverT
typename std::conditional< UseDefaultSolver, Solver, Solver & >::type SolverT
Definition: bfgs.hpp:27

cpu_mlp::BFGS::setInitialHessian
void setInitialHessian(const M &b)
Sets the initial approximate Hessian matrix.
Definition: bfgs.hpp:44

cpu_mlp::BFGS::solve
V solve(V x, VecFun< V, double > &f, GradFun< V > &Gradient) override
Solves the optimization problem using BFGS method.
Definition: bfgs.hpp:53

cpu_mlp::BFGS::requires
requires(UseDefaultSolver)
Definition: bfgs.hpp:35

cpu_mlp::BFGS::UseDefaultSolver
static constexpr bool UseDefaultSolver
Definition: bfgs.hpp:26

cpu_mlp::FullBatchMinimizer
Base class for Full Batch Minimizers.
Definition: full_batch_minimizer.hpp:23

cpu_mlp::FullBatchMinimizer::_tol
double _tol
Definition: full_batch_minimizer.hpp:109

cpu_mlp::FullBatchMinimizer::line_search
double line_search(V x, V p, VecFun< V, double > &f, GradFun< V > &Gradient)
Backtracking Line Search satisfying Wolfe Conditions.
Definition: full_batch_minimizer.hpp:126

cpu_mlp::FullBatchMinimizer::_iters
unsigned int _iters
Definition: full_batch_minimizer.hpp:108

cpu_mlp::FullBatchMinimizer::_max_iters
unsigned int _max_iters
Definition: full_batch_minimizer.hpp:107

GradFun
std::function< T(T)> GradFun
Gradient function type alias (T -> T).
Definition: common.hpp:32

VecFun
std::function< W(T)> VecFun
Objective function type alias (T -> W).
Definition: common.hpp:35

check
#define check(condition, message)
Debug assertion with message and source location.
Definition: common.hpp:14

full_batch_minimizer.hpp

cpu_mlp
Definition: layer.hpp:13

cpu_mlp::isSparse
constexpr bool isSparse
Definition: bfgs.hpp:11

cpu_mlp::DefaultSolverT
typename std::conditional< isSparse< M >, Eigen::ConjugateGradient< M >, Eigen::LDLT< M > >::type DefaultSolverT
Definition: bfgs.hpp:14