My Project
unified_launcher.hpp
Go to the documentation of this file.
1 #pragma once
2 
8 #include "network_wrapper.hpp"
9 #include "unified_optimization.hpp" // Optimizer strategies and config/dataset types
10 #include <Eigen/Core>
11 #include <iostream>
12 #include <memory>
13 #include <vector>
14 
15 template <typename Backend> class UnifiedLauncher;
16 
20 template <> class UnifiedLauncher<CpuBackend> {
21 public:
22  UnifiedLauncher() = default;
23 
30  template <int In, int Out, typename Activation> void addLayer() { net_wrapper_.addLayer<In, Out, Activation>(); }
31 
36  void buildNetwork() { net_wrapper_.bindParams(); }
37 
42  void setData(const UnifiedDataset &data) { dataset_ = data; }
43 
49  void train(UnifiedOptimizer<CpuBackend> &optimizer, const UnifiedConfig &config) {
50  std::cout << ">>> Running CPU Experiment: " << config.name << std::endl;
51  if (config.reset_params) {
52  net_wrapper_.bindParams(config.seed);
53  }
54  // Train on the configured dataset.
55  optimizer.optimize(net_wrapper_, dataset_, config);
56  // Evaluate on training data.
57  net_wrapper_.getInternal().test(dataset_.train_x, dataset_.train_y, "Training Results");
58  }
59 
64  void test() { net_wrapper_.getInternal().test(dataset_.test_x, dataset_.test_y, "Test Results"); }
65 
70  NetworkWrapper<CpuBackend> &getWrapper() { return net_wrapper_; }
71 
72 private:
73  NetworkWrapper<CpuBackend> net_wrapper_;
74  UnifiedDataset dataset_;
75 };
76 
77 #ifdef __CUDACC__
78  #include "cuda/cublas_handle.cuh"
79 
83 template <> class UnifiedLauncher<CudaBackend> {
84 public:
85  UnifiedLauncher() : net_wrapper_(handle_) {}
86 
93  template <int In, int Out, typename Activation> void addLayer() { net_wrapper_.addLayer<In, Out, Activation>(); }
94 
99  void buildNetwork() { net_wrapper_.bindParams(); }
100 
105  void setData(const UnifiedDataset &data) {
106  dataset_ = data;
107 
108  // Host-to-device upload.
109  auto upload = [](const Eigen::MatrixXd &mat, cuda_mlp::DeviceBuffer<cuda_mlp::CudaScalar> &dev_buf) {
110  if constexpr (std::is_same<double, cuda_mlp::CudaScalar>::value) {
111  dev_buf.copy_from_host((const cuda_mlp::CudaScalar *)mat.data(), mat.size());
112  } else {
113 
114  std::vector<cuda_mlp::CudaScalar> temp(mat.size());
115  const double *ptr = mat.data();
116  for (size_t i = 0; i < static_cast<size_t>(mat.size()); ++i)
117  temp[i] = static_cast<cuda_mlp::CudaScalar>(ptr[i]);
118  dev_buf.copy_from_host(temp.data(), temp.size());
119  }
120  };
121 
122  upload(dataset_.train_x, d_train_x_);
123  upload(dataset_.train_y, d_train_y_);
124  upload(dataset_.test_x, d_test_x_);
125  upload(dataset_.test_y, d_test_y_);
126 
127  std::cout << "Data Uploaded to GPU. Train: " << dataset_.train_x.cols() << " samples." << std::endl;
128  }
129 
135  void train(UnifiedOptimizer<CudaBackend> &optimizer, const UnifiedConfig &config) {
136  std::cout << ">>> Running CUDA Experiment: " << config.name << std::endl;
137  if (config.reset_params) {
138  net_wrapper_.bindParams(config.seed);
139  }
140  // Train on device buffers.
141  optimizer.optimize(handle_, net_wrapper_, dataset_, d_train_x_, d_train_y_, config);
142  // Evaluate on training data.
143  evaluate(dataset_.train_x, dataset_.train_y, d_train_x_, "Training Results");
144  }
145 
150  void test() { evaluate(dataset_.test_x, dataset_.test_y, d_test_x_, "Test Results"); }
151 
152 private:
154  void evaluate(const Eigen::MatrixXd &x,
155  const Eigen::MatrixXd &y,
157  const char *label) {
158  int batch_size = static_cast<int>(x.cols());
159  int out_dim = static_cast<int>(y.rows());
160 
161  auto &net = net_wrapper_.getInternal();
162  net.forward_only(d_x.data(), batch_size);
163 
164  std::vector<cuda_mlp::CudaScalar> host_output(batch_size * out_dim);
165  net.copy_output_to_host(host_output.data(), host_output.size());
166 
167  double mse = 0;
168  long correct = 0;
169  const double *target_ptr = y.data();
170 
171  for (int i = 0; i < batch_size; ++i) {
172  int pred_idx = 0;
173  int true_idx = 0;
174  double pred_max = -1e20;
175  double true_max = -1e20;
176 
177  for (int r = 0; r < out_dim; ++r) {
178  int idx = r + i * out_dim;
179  double val = host_output[idx];
180  double tval = target_ptr[idx];
181 
182  mse += (val - tval) * (val - tval);
183 
184  if (val > pred_max) {
185  pred_max = val;
186  pred_idx = r;
187  }
188  if (tval > true_max) {
189  true_max = tval;
190  true_idx = r;
191  }
192  }
193  if (pred_idx == true_idx) correct++;
194  }
195 
196  mse /= (double)(batch_size * out_dim);
197  double acc = ((double)correct / batch_size) * 100.0;
198  std::cout << label << ": MSE=" << mse << ", Accuracy=" << acc << "%" << std::endl;
199  }
200 
201  cuda_mlp::CublasHandle handle_;
202  NetworkWrapper<CudaBackend> net_wrapper_;
203  UnifiedDataset dataset_;
204  cuda_mlp::DeviceBuffer<cuda_mlp::CudaScalar> d_train_x_, d_train_y_, d_test_x_, d_test_y_;
205 };
206 #endif
CPU specialization of the network wrapper.
Definition: network_wrapper.hpp:60
Definition: network_wrapper.hpp:55
void test()
Evaluate on test data.
Definition: unified_launcher.hpp:64
void addLayer()
Add a layer to the CPU network.
Definition: unified_launcher.hpp:30
void train(UnifiedOptimizer< CpuBackend > &optimizer, const UnifiedConfig &config)
Run training for the selected optimizer.
Definition: unified_launcher.hpp:49
void buildNetwork()
Finalize parameters and internal buffers.
Definition: unified_launcher.hpp:36
void setData(const UnifiedDataset &data)
Attach the training/test dataset.
Definition: unified_launcher.hpp:42
NetworkWrapper< CpuBackend > & getWrapper()
Access the underlying wrapper.
Definition: unified_launcher.hpp:70
Definition: unified_launcher.hpp:15
Specialization for CPU Backend.
Definition: unified_optimization.hpp:144
virtual void optimize(NetworkWrapper< CpuBackend > &net, const UnifiedDataset &data, const UnifiedConfig &config)=0
Executes the optimization strategy.
Abstract base class for backend-specific optimizer strategies.
Definition: unified_optimization.hpp:135
RAII-managed cuBLAS handle.
Definition: cublas_handle.cuh:22
Owning buffer for device memory.
Definition: device_buffer.cuh:7
T * data()
Mutable raw pointer to device memory.
Definition: device_buffer.cuh:68
float CudaScalar
Scalar type used across CUDA kernels and optimizers.
Definition: common.cuh:11
Backend-agnostic wrapper for CPU/CUDA networks.
Backend tag for CPU implementations.
Definition: network_wrapper.hpp:20
Backend tag for CUDA implementations.
Definition: network_wrapper.hpp:22
Configuration parameters for training experiments.
Definition: unified_optimization.hpp:26
unsigned int seed
Definition: unified_optimization.hpp:47
bool reset_params
Definition: unified_optimization.hpp:46
std::string name
Definition: unified_optimization.hpp:27
Container for training and test data.
Definition: unified_optimization.hpp:54