My Project
layer.hpp
Go to the documentation of this file.
1 #pragma once
7 #include "common.hpp"
8 #include <Eigen/Core>
9 #include <Eigen/src/Core/Map.h>
10 #include <autodiff/reverse/var.hpp>
11 #include <autodiff/reverse/var/eigen.hpp>
12 
13 namespace cpu_mlp {
14 
16 struct Linear {
17  static inline double apply(double x) { return x; }
18  static inline double prime(double /*x*/) { return 1.0; }
19  static constexpr double scale = 1.0;
20 };
21 
23 struct ReLU {
24  static inline double apply(double x) { return (x > 0.0) ? x : 0.0; }
25  static inline double prime(double x) { return (x > 0.0) ? 1.0 : 0.0; }
26  static constexpr double scale = 1.41421356;
27 };
28 
30 struct Sigmoid {
31  static inline double apply(double x) { return 1.0 / (1.0 + std::exp(-x)); }
32  static inline double prime(double x) {
33  double s = apply(x);
34  return s * (1.0 - s);
35  }
36  static constexpr double scale = 1.0;
37 };
38 
40 struct Tanh {
41  static inline double apply(double x) { return std::tanh(x); }
42  static inline double prime(double x) {
43  double t = std::tanh(x);
44  return 1.0 - (t * t);
45  }
46  static constexpr double scale = 1.0;
47 };
48 
52 class Layer {
53 public:
54  virtual ~Layer() = default;
56  virtual void bind(double *params, double *grads) = 0;
58  virtual void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output) = 0;
60  virtual void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad) = 0;
62  virtual int getInSize() const = 0;
64  virtual int getOutSize() const = 0;
66  virtual int getParamsSize() const = 0;
68  virtual double getInitStdDev() const = 0;
69 };
70 
74 template <int In, int Out, typename Activation = Linear> class DenseLayer : public Layer {
75 private:
76  using MapMatW = Eigen::Map<const Eigen::MatrixXd>;
77  using MapVecB = Eigen::Map<const Eigen::VectorXd>;
78 
79  using MapMatW_Grad = Eigen::Map<Eigen::MatrixXd>;
80  using MapVecB_Grad = Eigen::Map<Eigen::VectorXd>;
81 
82  double *params_ptr = nullptr;
83  double *grads_ptr = nullptr;
84 
85  Eigen::MatrixXd input_cache;
86  Eigen::MatrixXd z_cache;
87 
88 public:
90 
91  int getInSize() const override { return In; }
92  int getOutSize() const override { return Out; }
93  int getParamsSize() const override { return (Out * In) + Out; }
94 
95  void bind(double *params, double *grads) override {
96  params_ptr = params;
97  grads_ptr = grads;
98  }
99 
100  void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output) override {
101  MapMatW W(params_ptr, Out, In);
102  MapVecB b(params_ptr + (Out * In), Out);
103 
104  // Cache inputs for backward pass.
105  input_cache = input;
106  z_cache = W * input;
107  z_cache.colwise() += b;
108 
109  output = z_cache.unaryExpr([](double v) { return Activation::apply(v); });
110  }
111 
112  void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad) override {
113  MapMatW_Grad dW(grads_ptr, Out, In);
114  MapVecB_Grad db(grads_ptr + (Out * In), Out);
115 
116  // Apply activation derivative to incoming gradient.
117  Eigen::MatrixXd dZ = next_grad.cwiseProduct(z_cache.unaryExpr([](double v) { return Activation::prime(v); }));
118 
119  // Accumulate parameter gradients.
120  dW.noalias() += dZ * input_cache.transpose();
121  db.noalias() += dZ.rowwise().sum();
122 
123  if (prev_grad) {
124  MapMatW W(params_ptr, Out, In);
125  // Propagate gradient to previous layer.
126  *prev_grad = W.transpose() * dZ;
127  }
128  }
129 
130  double getInitStdDev() const override { return Activation::scale * std::sqrt(1.0 / (double)In); }
131 };
132 
133 } // namespace cpu_mlp
Fully-connected layer with activation.
Definition: layer.hpp:74
void bind(double *params, double *grads) override
Bind parameter and gradient storage.
Definition: layer.hpp:95
int getOutSize() const override
Output dimension.
Definition: layer.hpp:92
int getInSize() const override
Input dimension.
Definition: layer.hpp:91
int getParamsSize() const override
Parameter count.
Definition: layer.hpp:93
void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output) override
Forward pass for a batch.
Definition: layer.hpp:100
DenseLayer()
Definition: layer.hpp:89
double getInitStdDev() const override
Initialization scale.
Definition: layer.hpp:130
void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad) override
Backward pass for a batch.
Definition: layer.hpp:112
Abstract layer interface.
Definition: layer.hpp:52
virtual void bind(double *params, double *grads)=0
Bind parameter and gradient storage.
virtual int getInSize() const =0
Input dimension.
virtual void forward(const Eigen::MatrixXd &input, Eigen::MatrixXd &output)=0
Forward pass for a batch.
virtual double getInitStdDev() const =0
Initialization scale.
virtual ~Layer()=default
virtual int getOutSize() const =0
Output dimension.
virtual void backward(const Eigen::MatrixXd &next_grad, Eigen::MatrixXd *prev_grad)=0
Backward pass for a batch.
virtual int getParamsSize() const =0
Parameter count.
Common aliases and utilities shared across CPU components.
Definition: layer.hpp:13
Linear activation (identity).
Definition: layer.hpp:16
static double prime(double)
Definition: layer.hpp:18
static constexpr double scale
Definition: layer.hpp:19
static double apply(double x)
Definition: layer.hpp:17
ReLU activation.
Definition: layer.hpp:23
static double apply(double x)
Definition: layer.hpp:24
static constexpr double scale
Definition: layer.hpp:26
static double prime(double x)
Definition: layer.hpp:25
Sigmoid activation.
Definition: layer.hpp:30
static constexpr double scale
Definition: layer.hpp:36
static double apply(double x)
Definition: layer.hpp:31
static double prime(double x)
Definition: layer.hpp:32
Tanh activation.
Definition: layer.hpp:40
static double apply(double x)
Definition: layer.hpp:41
static constexpr double scale
Definition: layer.hpp:46
static double prime(double x)
Definition: layer.hpp:42