My Project
Namespaces | Enumerations | Functions
kernels.cuh File Reference
#include "common.cuh"
#include "cublas_handle.cuh"
#include <cmath>
Include dependency graph for kernels.cuh:
This graph shows which files directly or indirectly include this file:

Namespaces

 cuda_mlp
 

Enumerations

enum class  cuda_mlp::ActivationType : int { cuda_mlp::Linear = 0 , cuda_mlp::Tanh = 1 , cuda_mlp::ReLU = 2 , cuda_mlp::Sigmoid = 3 }
 Supported activation functions. More...
 

Functions

void cuda_mlp::device_set_zero (CudaScalar *ptr, size_t n)
 Set device memory to zero. More...
 
void cuda_mlp::device_copy (CudaScalar *dst, const CudaScalar *src, size_t n)
 Copy device-to-device. More...
 
CudaScalar cuda_mlp::device_dot (CublasHandle &handle, const CudaScalar *x, const CudaScalar *y, int n)
 Compute dot product on device using cuBLAS. More...
 
CudaScalar cuda_mlp::device_nrm2 (CublasHandle &handle, const CudaScalar *x, int n)
 Compute Euclidean norm on device using cuBLAS. More...
 
void cuda_mlp::device_axpy (CublasHandle &handle, int n, CudaScalar alpha, const CudaScalar *x, CudaScalar *y)
 y <- alpha * x + y (AXPY) on device using cuBLAS. More...
 
void cuda_mlp::device_scal (CublasHandle &handle, int n, CudaScalar alpha, CudaScalar *x)
 Scale vector x <- alpha * x on device using cuBLAS. More...
 
CudaScalar cuda_mlp::activation_scale (ActivationType act)
 scaling factor for initialization. More...
 
__global__ void cuda_mlp::add_bias_kernel (CudaScalar *z, const CudaScalar *b, int rows, int cols)
 Kernel: add bias vector to column-major matrix. More...
 
__global__ void cuda_mlp::activation_kernel (CudaScalar *a, int n, int act)
 Kernel: apply activation in-place. More...
 
__global__ void cuda_mlp::activation_deriv_kernel (CudaScalar *grad, const CudaScalar *a, int n, int act)
 Kernel: multiply gradient by activation derivative. More...
 
__global__ void cuda_mlp::diff_kernel (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n)
 Kernel: diff = output - target. More...
 
__global__ void cuda_mlp::sum_rows_kernel (const CudaScalar *mat, CudaScalar *out, int rows, int cols)
 Kernel: sum columns (rows x cols) into a row vector. More...
 
void cuda_mlp::launch_add_bias (CudaScalar *z, const CudaScalar *b, int rows, int cols)
 Launch add-bias kernel. More...
 
void cuda_mlp::launch_activation (CudaScalar *a, int n, ActivationType act)
 Launch activation kernel. More...
 
void cuda_mlp::launch_activation_deriv (CudaScalar *grad, const CudaScalar *a, int n, ActivationType act)
 Launch activation-derivative kernel. More...
 
void cuda_mlp::launch_diff (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n)
 Launch diff kernel. More...
 
void cuda_mlp::launch_sum_rows (const CudaScalar *mat, CudaScalar *out, int rows, int cols)
 Launch sum-rows kernel. More...