|
| void | cuda_mlp::device_set_zero (CudaScalar *ptr, size_t n) |
| | Set device memory to zero. More...
|
| |
| void | cuda_mlp::device_copy (CudaScalar *dst, const CudaScalar *src, size_t n) |
| | Copy device-to-device. More...
|
| |
| CudaScalar | cuda_mlp::device_dot (CublasHandle &handle, const CudaScalar *x, const CudaScalar *y, int n) |
| | Compute dot product on device using cuBLAS. More...
|
| |
| CudaScalar | cuda_mlp::device_nrm2 (CublasHandle &handle, const CudaScalar *x, int n) |
| | Compute Euclidean norm on device using cuBLAS. More...
|
| |
| void | cuda_mlp::device_axpy (CublasHandle &handle, int n, CudaScalar alpha, const CudaScalar *x, CudaScalar *y) |
| | y <- alpha * x + y (AXPY) on device using cuBLAS. More...
|
| |
| void | cuda_mlp::device_scal (CublasHandle &handle, int n, CudaScalar alpha, CudaScalar *x) |
| | Scale vector x <- alpha * x on device using cuBLAS. More...
|
| |
| CudaScalar | cuda_mlp::activation_scale (ActivationType act) |
| | scaling factor for initialization. More...
|
| |
| __global__ void | cuda_mlp::add_bias_kernel (CudaScalar *z, const CudaScalar *b, int rows, int cols) |
| | Kernel: add bias vector to column-major matrix. More...
|
| |
| __global__ void | cuda_mlp::activation_kernel (CudaScalar *a, int n, int act) |
| | Kernel: apply activation in-place. More...
|
| |
| __global__ void | cuda_mlp::activation_deriv_kernel (CudaScalar *grad, const CudaScalar *a, int n, int act) |
| | Kernel: multiply gradient by activation derivative. More...
|
| |
| __global__ void | cuda_mlp::diff_kernel (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n) |
| | Kernel: diff = output - target. More...
|
| |
| __global__ void | cuda_mlp::sum_rows_kernel (const CudaScalar *mat, CudaScalar *out, int rows, int cols) |
| | Kernel: sum columns (rows x cols) into a row vector. More...
|
| |
| void | cuda_mlp::launch_add_bias (CudaScalar *z, const CudaScalar *b, int rows, int cols) |
| | Launch add-bias kernel. More...
|
| |
| void | cuda_mlp::launch_activation (CudaScalar *a, int n, ActivationType act) |
| | Launch activation kernel. More...
|
| |
| void | cuda_mlp::launch_activation_deriv (CudaScalar *grad, const CudaScalar *a, int n, ActivationType act) |
| | Launch activation-derivative kernel. More...
|
| |
| void | cuda_mlp::launch_diff (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n) |
| | Launch diff kernel. More...
|
| |
| void | cuda_mlp::launch_sum_rows (const CudaScalar *mat, CudaScalar *out, int rows, int cols) |
| | Launch sum-rows kernel. More...
|
| |