My Project
network.hpp
Go to the documentation of this file.
1 #pragma once
7 #include "layer.hpp"
8 #include "seed.hpp"
9 #include <Eigen/Core>
10 #include <algorithm>
11 #include <iostream>
12 #include <memory>
13 #include <random>
14 #include <vector>
15 
16 namespace cpu_mlp {
17 
21 class Network {
22 private:
23  std::vector<std::unique_ptr<Layer>> layers;
24 
25  std::vector<double> params;
26  std::vector<double> grads;
27  size_t params_size = 0;
28 
29  std::vector<Eigen::MatrixXd> activations;
30  std::vector<Eigen::MatrixXd> deltas;
31 
32 public:
33  Network() = default;
34 
36  size_t getSize() const { return params_size; }
37 
39  template <int In, int Out, typename Activation = Linear> void addLayer() {
40  layers.push_back(std::make_unique<DenseLayer<In, Out, Activation>>());
41  params_size += layers.back()->getParamsSize();
42  }
43 
45  void bindParams(unsigned int seed = kDefaultSeed) {
46  if (layers.empty()) return;
47 
48  // Allocate contiguous storage for parameters and gradients.
49  params.resize(params_size);
50  grads.resize(params_size);
51 
52  std::mt19937 gen(seed);
53 
54  activations.clear();
55  deltas.clear();
56 
57  double *p_ptr = params.data();
58  double *g_ptr = grads.data();
59 
60  for (auto &layer : layers) {
61  // Bind layer views into the shared buffers.
62  layer->bind(p_ptr, g_ptr);
63  double std_dev = layer->getInitStdDev();
64  std::normal_distribution<double> dist(0.0, std_dev);
65  for (int i = 0; i < layer->getParamsSize(); ++i) {
66  p_ptr[i] = dist(gen);
67  }
68  p_ptr += layer->getParamsSize();
69  g_ptr += layer->getParamsSize();
70  }
71  }
72 
74  const Eigen::MatrixXd &forward(const Eigen::MatrixXd &input) {
75  if (activations.empty() || activations[0].cols() != input.cols()) {
76  // Resize cached activations for the current batch.
77  activations.resize(layers.size() + 1);
78  }
79 
80  activations[0] = input;
81 
82  for (size_t i = 0; i < layers.size(); ++i) {
83  layers[i]->forward(activations[i], activations[i + 1]);
84  }
85 
86  return activations.back();
87  }
88 
90  void backward(const Eigen::MatrixXd &loss_grad) {
91  if (deltas.size() != layers.size() + 1) {
92  // Resize cached deltas for the current batch.
93  deltas.resize(layers.size() + 1);
94  }
95 
96  deltas.back() = loss_grad;
97 
98  for (int i = layers.size() - 1; i >= 0; --i) {
99  // Propagate gradients to the previous layer.
100  layers[i]->backward(deltas[i + 1], (i > 0) ? &deltas[i] : nullptr);
101  }
102  }
103 
105  void zeroGrads() { std::fill(grads.begin(), grads.end(), 0.0); }
106 
108  double *getParamsData() { return params.data(); }
110  double *getGradsData() { return grads.data(); }
111 
113  void setParams(const Eigen::VectorXd &new_params) {
114  std::copy(new_params.data(), new_params.data() + params_size, params.begin());
115  }
116 
118  void getGrads(Eigen::VectorXd &out_grads) { std::copy(grads.begin(), grads.end(), out_grads.data()); }
119 
121  void test(const Eigen::MatrixXd &inputs, const Eigen::MatrixXd &targets, std::string label = "Test Results") {
122  const auto &output = this->forward(inputs);
123  long correct = 0;
124  long total = inputs.cols();
125 
126  for (long i = 0; i < total; ++i) {
127  Eigen::Index pred_idx, true_idx;
128  output.col(i).maxCoeff(&pred_idx);
129  targets.col(i).maxCoeff(&true_idx);
130  if (pred_idx == true_idx) {
131  correct++;
132  }
133  }
134 
135  double accuracy = (double)correct / total * 100.0;
136  Eigen::MatrixXd diff = output - targets;
137  double mse = 0.5 * diff.squaredNorm();
138 
139  std::cout << "=== " << label << " ===" << std::endl;
140  std::cout << "Samples: " << total << std::endl;
141  std::cout << "Accuracy: " << accuracy << "% (" << correct << "/" << total << ")" << std::endl;
142  std::cout << "Total MSE: " << mse << std::endl;
143  std::cout << "====================" << std::endl;
144  }
145 };
146 
147 } // namespace cpu_mlp
Fully-connected layer with activation.
Definition: layer.hpp:74
Simple feedforward network with contiguous parameter storage.
Definition: network.hpp:21
const Eigen::MatrixXd & forward(const Eigen::MatrixXd &input)
Run a forward pass for a batch of inputs.
Definition: network.hpp:74
void getGrads(Eigen::VectorXd &out_grads)
Copy gradients to output vector.
Definition: network.hpp:118
double * getGradsData()
Access raw gradient buffer.
Definition: network.hpp:110
void backward(const Eigen::MatrixXd &loss_grad)
Run a backward pass from output loss gradients.
Definition: network.hpp:90
void test(const Eigen::MatrixXd &inputs, const Eigen::MatrixXd &targets, std::string label="Test Results")
Evaluate accuracy and MSE for a dataset.
Definition: network.hpp:121
void zeroGrads()
Zero the gradient buffer.
Definition: network.hpp:105
size_t getSize() const
Total number of parameters.
Definition: network.hpp:36
void setParams(const Eigen::VectorXd &new_params)
Replace parameters with a new vector.
Definition: network.hpp:113
double * getParamsData()
Access raw parameter buffer.
Definition: network.hpp:108
void addLayer()
Append a dense layer to the network.
Definition: network.hpp:39
void bindParams(unsigned int seed=kDefaultSeed)
Bind and initialize parameters and gradient buffers.
Definition: network.hpp:45
Network()=default
Layer interfaces and dense layer implementation for CPU.
Definition: layer.hpp:13
constexpr unsigned int kDefaultSeed
Definition: seed.hpp:4