23 std::vector<std::unique_ptr<Layer>> layers;
25 std::vector<double> params;
26 std::vector<double> grads;
27 size_t params_size = 0;
29 std::vector<Eigen::MatrixXd> activations;
30 std::vector<Eigen::MatrixXd> deltas;
36 size_t getSize()
const {
return params_size; }
39 template <
int In,
int Out,
typename Activation = Linear>
void addLayer() {
41 params_size += layers.back()->getParamsSize();
46 if (layers.empty())
return;
49 params.resize(params_size);
50 grads.resize(params_size);
52 std::mt19937 gen(seed);
57 double *p_ptr = params.data();
58 double *g_ptr = grads.data();
60 for (
auto &layer : layers) {
62 layer->bind(p_ptr, g_ptr);
63 double std_dev = layer->getInitStdDev();
64 std::normal_distribution<double> dist(0.0, std_dev);
65 for (
int i = 0; i < layer->getParamsSize(); ++i) {
68 p_ptr += layer->getParamsSize();
69 g_ptr += layer->getParamsSize();
74 const Eigen::MatrixXd &
forward(
const Eigen::MatrixXd &input) {
75 if (activations.empty() || activations[0].cols() != input.cols()) {
77 activations.resize(layers.size() + 1);
80 activations[0] = input;
82 for (
size_t i = 0; i < layers.size(); ++i) {
83 layers[i]->forward(activations[i], activations[i + 1]);
86 return activations.back();
90 void backward(
const Eigen::MatrixXd &loss_grad) {
91 if (deltas.size() != layers.size() + 1) {
93 deltas.resize(layers.size() + 1);
96 deltas.back() = loss_grad;
98 for (
int i = layers.size() - 1; i >= 0; --i) {
100 layers[i]->backward(deltas[i + 1], (i > 0) ? &deltas[i] :
nullptr);
105 void zeroGrads() { std::fill(grads.begin(), grads.end(), 0.0); }
114 std::copy(new_params.data(), new_params.data() + params_size, params.begin());
118 void getGrads(Eigen::VectorXd &out_grads) { std::copy(grads.begin(), grads.end(), out_grads.data()); }
121 void test(
const Eigen::MatrixXd &inputs,
const Eigen::MatrixXd &targets, std::string label =
"Test Results") {
122 const auto &output = this->
forward(inputs);
124 long total = inputs.cols();
126 for (
long i = 0; i < total; ++i) {
127 Eigen::Index pred_idx, true_idx;
128 output.col(i).maxCoeff(&pred_idx);
129 targets.col(i).maxCoeff(&true_idx);
130 if (pred_idx == true_idx) {
135 double accuracy = (double)correct / total * 100.0;
136 Eigen::MatrixXd diff = output - targets;
137 double mse = 0.5 * diff.squaredNorm();
139 std::cout <<
"=== " << label <<
" ===" << std::endl;
140 std::cout <<
"Samples: " << total << std::endl;
141 std::cout <<
"Accuracy: " << accuracy <<
"% (" << correct <<
"/" << total <<
")" << std::endl;
142 std::cout <<
"Total MSE: " << mse << std::endl;
143 std::cout <<
"====================" << std::endl;
Fully-connected layer with activation.
Definition: layer.hpp:74
Simple feedforward network with contiguous parameter storage.
Definition: network.hpp:21
const Eigen::MatrixXd & forward(const Eigen::MatrixXd &input)
Run a forward pass for a batch of inputs.
Definition: network.hpp:74
void getGrads(Eigen::VectorXd &out_grads)
Copy gradients to output vector.
Definition: network.hpp:118
double * getGradsData()
Access raw gradient buffer.
Definition: network.hpp:110
void backward(const Eigen::MatrixXd &loss_grad)
Run a backward pass from output loss gradients.
Definition: network.hpp:90
void test(const Eigen::MatrixXd &inputs, const Eigen::MatrixXd &targets, std::string label="Test Results")
Evaluate accuracy and MSE for a dataset.
Definition: network.hpp:121
void zeroGrads()
Zero the gradient buffer.
Definition: network.hpp:105
size_t getSize() const
Total number of parameters.
Definition: network.hpp:36
void setParams(const Eigen::VectorXd &new_params)
Replace parameters with a new vector.
Definition: network.hpp:113
double * getParamsData()
Access raw parameter buffer.
Definition: network.hpp:108
void addLayer()
Append a dense layer to the network.
Definition: network.hpp:39
void bindParams(unsigned int seed=kDefaultSeed)
Bind and initialize parameters and gradient buffers.
Definition: network.hpp:45
Layer interfaces and dense layer implementation for CPU.
constexpr unsigned int kDefaultSeed
Definition: seed.hpp:4