9 #include <Eigen/src/Core/Map.h>
10 #include <autodiff/reverse/var.hpp>
11 #include <autodiff/reverse/var/eigen.hpp>
17 static inline double apply(
double x) {
return x; }
18 static inline double prime(
double ) {
return 1.0; }
19 static constexpr
double scale = 1.0;
24 static inline double apply(
double x) {
return (x > 0.0) ? x : 0.0; }
25 static inline double prime(
double x) {
return (x > 0.0) ? 1.0 : 0.0; }
26 static constexpr
double scale = 1.41421356;
31 static inline double apply(
double x) {
return 1.0 / (1.0 + std::exp(-x)); }
32 static inline double prime(
double x) {
36 static constexpr
double scale = 1.0;
41 static inline double apply(
double x) {
return std::tanh(x); }
42 static inline double prime(
double x) {
43 double t = std::tanh(x);
46 static constexpr
double scale = 1.0;
56 virtual void bind(
double *params,
double *grads) = 0;
58 virtual void forward(
const Eigen::MatrixXd &input, Eigen::MatrixXd &output) = 0;
60 virtual void backward(
const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad) = 0;
74 template <
int In,
int Out,
typename Activation = Linear>
class DenseLayer :
public Layer {
76 using MapMatW = Eigen::Map<const Eigen::MatrixXd>;
77 using MapVecB = Eigen::Map<const Eigen::VectorXd>;
79 using MapMatW_Grad = Eigen::Map<Eigen::MatrixXd>;
80 using MapVecB_Grad = Eigen::Map<Eigen::VectorXd>;
82 double *params_ptr =
nullptr;
83 double *grads_ptr =
nullptr;
85 Eigen::MatrixXd input_cache;
86 Eigen::MatrixXd z_cache;
95 void bind(
double *params,
double *grads)
override {
100 void forward(
const Eigen::MatrixXd &input, Eigen::MatrixXd &output)
override {
101 MapMatW W(params_ptr, Out, In);
102 MapVecB b(params_ptr + (Out * In), Out);
107 z_cache.colwise() += b;
109 output = z_cache.unaryExpr([](
double v) {
return Activation::apply(v); });
112 void backward(
const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad)
override {
113 MapMatW_Grad dW(grads_ptr, Out, In);
114 MapVecB_Grad db(grads_ptr + (Out * In), Out);
117 Eigen::MatrixXd dZ = next_grad.cwiseProduct(z_cache.unaryExpr([](
double v) { return Activation::prime(v); }));
120 dW.noalias() += dZ * input_cache.transpose();
121 db.noalias() += dZ.rowwise().sum();
124 MapMatW W(params_ptr, Out, In);
126 *prev_grad = W.transpose() * dZ;
130 double getInitStdDev()
const override {
return Activation::scale * std::sqrt(1.0 / (
double)In); }
Fully-connected layer with activation.
Definition: layer.hpp:74
void bind(double *params, double *grads) override
Bind parameter and gradient storage.
Definition: layer.hpp:95
int getOutSize() const override
Output dimension.
Definition: layer.hpp:92
int getInSize() const override
Input dimension.
Definition: layer.hpp:91
int getParamsSize() const override
Parameter count.
Definition: layer.hpp:93
void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output) override
Forward pass for a batch.
Definition: layer.hpp:100
DenseLayer()
Definition: layer.hpp:89
double getInitStdDev() const override
Initialization scale.
Definition: layer.hpp:130
void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad) override
Backward pass for a batch.
Definition: layer.hpp:112
Abstract layer interface.
Definition: layer.hpp:52
virtual void bind(double *params, double *grads)=0
Bind parameter and gradient storage.
virtual int getInSize() const =0
Input dimension.
virtual void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output)=0
Forward pass for a batch.
virtual double getInitStdDev() const =0
Initialization scale.
virtual int getOutSize() const =0
Output dimension.
virtual void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad)=0
Backward pass for a batch.
virtual int getParamsSize() const =0
Parameter count.
Common aliases and utilities shared across CPU components.
Linear activation (identity).
Definition: layer.hpp:16
static double prime(double)
Definition: layer.hpp:18
static constexpr double scale
Definition: layer.hpp:19
static double apply(double x)
Definition: layer.hpp:17
ReLU activation.
Definition: layer.hpp:23
static double apply(double x)
Definition: layer.hpp:24
static constexpr double scale
Definition: layer.hpp:26
static double prime(double x)
Definition: layer.hpp:25
Sigmoid activation.
Definition: layer.hpp:30
static constexpr double scale
Definition: layer.hpp:36
static double apply(double x)
Definition: layer.hpp:31
static double prime(double x)
Definition: layer.hpp:32
Tanh activation.
Definition: layer.hpp:40
static double apply(double x)
Definition: layer.hpp:41
static constexpr double scale
Definition: layer.hpp:46
static double prime(double x)
Definition: layer.hpp:42