lbfgs-FFNN/newton_8hpp_source.html

 #pragma once


 #include "../common.hpp"

 #include "full_batch_minimizer.hpp"

 #include <Eigen/Eigen>

 #include <iostream>


 namespace cpu_mlp {


 template <typename V, typename M> class Newton : public FullBatchMinimizer<V, M> {

   using Base = FullBatchMinimizer<V, M>;

   using Base::_iters;

   using Base::_max_iters;

   using Base::_tol;

   using Base::line_search;


 public:

   void setHessian(const HessFun<V, M> &hessFun) noexcept { _hessFun = hessFun; }


   V solve(V x, VecFun<V, double> &f, GradFun<V> &Gradient) override {

     Eigen::LDLT<M> ldlt;


     check(static_cast<bool>(_hessFun), "Hessian function must be set for Newton solver");


     for (_iters = 0; _iters < _max_iters; ++_iters) {

       V g = Gradient(x);

       double gnorm = g.norm();

       if (gnorm <= _tol) break;


       M H = _hessFun(x);

       check(H.rows() == H.cols(), "Hessian must be square");

       check(H.rows() == g.size(), "Hessian/gradient size mismatch");


       V p;

       bool found = false;

       double mu = reg_init;

       const double max_mu = reg_max;


       while (mu <= max_mu) {

         M Hreg = H;

         Hreg += mu * M::Identity(H.rows(), H.cols());


         ldlt.compute(Hreg);

         if (ldlt.info() == Eigen::Success) {

           p = ldlt.solve(-g);

           if (ldlt.info() == Eigen::Success && p.dot(g) < 0.0) {

             found = true;

             break;

           }

         }

         mu *= reg_growth;

       }


       if (!found) {

         p = -g;

       }


       double alpha = line_search(x, p, f, Gradient);

       x = x + alpha * p;

     }


     return x;

   }


 private:

   HessFun<V, M> _hessFun;

   double reg_init = 1e-6;   // Initial diagonal damping.

   double reg_max = 1e6;     // Maximum damping.

   double reg_growth = 10.0; // Growth factor for damping.

 };


 } // namespace cpu_mlp

cpu_mlp::FullBatchMinimizer
Base class for Full Batch Minimizers.
Definition: full_batch_minimizer.hpp:23

cpu_mlp::FullBatchMinimizer::_tol
double _tol
Definition: full_batch_minimizer.hpp:109

cpu_mlp::FullBatchMinimizer::line_search
double line_search(V x, V p, VecFun< V, double > &f, GradFun< V > &Gradient)
Backtracking Line Search satisfying Wolfe Conditions.
Definition: full_batch_minimizer.hpp:126

cpu_mlp::FullBatchMinimizer::_iters
unsigned int _iters
Definition: full_batch_minimizer.hpp:108

cpu_mlp::FullBatchMinimizer::_max_iters
unsigned int _max_iters
Definition: full_batch_minimizer.hpp:107

cpu_mlp::Newton
Newton minimizer (full Newton) for unconstrained optimization.
Definition: newton.hpp:13

cpu_mlp::Newton::solve
V solve(V x, VecFun< V, double > &f, GradFun< V > &Gradient) override
Solves the unconstrained optimization problem using Newton's Method.
Definition: newton.hpp:34

cpu_mlp::Newton::setHessian
void setHessian(const HessFun< V, M > &hessFun) noexcept
Sets the Hessian function for computing the second derivative matrix.
Definition: newton.hpp:25

GradFun
std::function< T(T)> GradFun
Gradient function type alias (T -> T).
Definition: common.hpp:32

VecFun
std::function< W(T)> VecFun
Objective function type alias (T -> W).
Definition: common.hpp:35

check
#define check(condition, message)
Debug assertion with message and source location.
Definition: common.hpp:14

HessFun
std::function< M(V)> HessFun
Hessian function type alias (V -> M).
Definition: common.hpp:38

full_batch_minimizer.hpp

cpu_mlp
Definition: layer.hpp:13