My Project
Classes | Typedefs | Enumerations | Functions
cuda_mlp Namespace Reference

Classes

class  CublasHandle
 RAII-managed cuBLAS handle. More...
 
class  DeviceBuffer
 Owning buffer for device memory. More...
 
class  CudaGD
 Gradient descent with optional momentum. More...
 
class  CudaDenseLayer
 Fully connected layer with activation, using column-major matrices. More...
 
class  CudaLBFGS
 Limited-memory BFGS with Armijo backtracking line search. More...
 
class  CudaMinimizerBase
 Abstract base class for CUDA-based minimizers. More...
 
class  CudaNetwork
 Feed-forward dense network with GPU-backed parameters and gradients. More...
 
class  CudaSGD
 SGD with optional momentum and learning-rate decay. More...
 

Typedefs

using CudaScalar = float
 Scalar type used across CUDA kernels and optimizers. More...
 

Enumerations

enum class  ActivationType : int { Linear = 0 , Tanh = 1 , ReLU = 2 , Sigmoid = 3 }
 Supported activation functions. More...
 

Functions

void cuda_check (cudaError_t err, const char *msg)
 Check a CUDA API call and abort with a message on failure. More...
 
void cublas_check (cublasStatus_t status, const char *msg)
 Check a cuBLAS API call and abort with a message on failure. More...
 
void device_set_zero (CudaScalar *ptr, size_t n)
 Set device memory to zero. More...
 
void device_copy (CudaScalar *dst, const CudaScalar *src, size_t n)
 Copy device-to-device. More...
 
CudaScalar device_dot (CublasHandle &handle, const CudaScalar *x, const CudaScalar *y, int n)
 Compute dot product on device using cuBLAS. More...
 
CudaScalar device_nrm2 (CublasHandle &handle, const CudaScalar *x, int n)
 Compute Euclidean norm on device using cuBLAS. More...
 
void device_axpy (CublasHandle &handle, int n, CudaScalar alpha, const CudaScalar *x, CudaScalar *y)
 y <- alpha * x + y (AXPY) on device using cuBLAS. More...
 
void device_scal (CublasHandle &handle, int n, CudaScalar alpha, CudaScalar *x)
 Scale vector x <- alpha * x on device using cuBLAS. More...
 
CudaScalar activation_scale (ActivationType act)
 scaling factor for initialization. More...
 
__global__ void add_bias_kernel (CudaScalar *z, const CudaScalar *b, int rows, int cols)
 Kernel: add bias vector to column-major matrix. More...
 
__global__ void activation_kernel (CudaScalar *a, int n, int act)
 Kernel: apply activation in-place. More...
 
__global__ void activation_deriv_kernel (CudaScalar *grad, const CudaScalar *a, int n, int act)
 Kernel: multiply gradient by activation derivative. More...
 
__global__ void diff_kernel (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n)
 Kernel: diff = output - target. More...
 
__global__ void sum_rows_kernel (const CudaScalar *mat, CudaScalar *out, int rows, int cols)
 Kernel: sum columns (rows x cols) into a row vector. More...
 
void launch_add_bias (CudaScalar *z, const CudaScalar *b, int rows, int cols)
 Launch add-bias kernel. More...
 
void launch_activation (CudaScalar *a, int n, ActivationType act)
 Launch activation kernel. More...
 
void launch_activation_deriv (CudaScalar *grad, const CudaScalar *a, int n, ActivationType act)
 Launch activation-derivative kernel. More...
 
void launch_diff (const CudaScalar *output, const CudaScalar *target, CudaScalar *diff, int n)
 Launch diff kernel. More...
 
void launch_sum_rows (const CudaScalar *mat, CudaScalar *out, int rows, int cols)
 Launch sum-rows kernel. More...
 

Typedef Documentation

◆ CudaScalar

using cuda_mlp::CudaScalar = typedef float

Scalar type used across CUDA kernels and optimizers.

Enumeration Type Documentation

◆ ActivationType

enum cuda_mlp::ActivationType : int
strong

Supported activation functions.

Enumerator
Linear 
Tanh 
ReLU 
Sigmoid 

Function Documentation

◆ activation_deriv_kernel()

__global__ void cuda_mlp::activation_deriv_kernel ( CudaScalar grad,
const CudaScalar a,
int  n,
int  act 
)

Kernel: multiply gradient by activation derivative.

◆ activation_kernel()

__global__ void cuda_mlp::activation_kernel ( CudaScalar a,
int  n,
int  act 
)

Kernel: apply activation in-place.

◆ activation_scale()

CudaScalar cuda_mlp::activation_scale ( ActivationType  act)
inline

scaling factor for initialization.

Here is the caller graph for this function:

◆ add_bias_kernel()

__global__ void cuda_mlp::add_bias_kernel ( CudaScalar z,
const CudaScalar b,
int  rows,
int  cols 
)

Kernel: add bias vector to column-major matrix.

◆ cublas_check()

void cuda_mlp::cublas_check ( cublasStatus_t  status,
const char *  msg 
)
inline

Check a cuBLAS API call and abort with a message on failure.

Parameters
statuscuBLAS status code.
msgContext string describing the operation.
Here is the caller graph for this function:

◆ cuda_check()

void cuda_mlp::cuda_check ( cudaError_t  err,
const char *  msg 
)
inline

Check a CUDA API call and abort with a message on failure.

Parameters
errCUDA error code returned by the runtime
msgContext string describing the operation
Here is the caller graph for this function:

◆ device_axpy()

void cuda_mlp::device_axpy ( CublasHandle handle,
int  n,
CudaScalar  alpha,
const CudaScalar x,
CudaScalar y 
)
inline

y <- alpha * x + y (AXPY) on device using cuBLAS.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_copy()

void cuda_mlp::device_copy ( CudaScalar dst,
const CudaScalar src,
size_t  n 
)
inline

Copy device-to-device.

Parameters
dstDestination device pointer.
srcSource device pointer.
nNumber of elements.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_dot()

CudaScalar cuda_mlp::device_dot ( CublasHandle handle,
const CudaScalar x,
const CudaScalar y,
int  n 
)
inline

Compute dot product on device using cuBLAS.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_nrm2()

CudaScalar cuda_mlp::device_nrm2 ( CublasHandle handle,
const CudaScalar x,
int  n 
)
inline

Compute Euclidean norm on device using cuBLAS.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_scal()

void cuda_mlp::device_scal ( CublasHandle handle,
int  n,
CudaScalar  alpha,
CudaScalar x 
)
inline

Scale vector x <- alpha * x on device using cuBLAS.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_set_zero()

void cuda_mlp::device_set_zero ( CudaScalar ptr,
size_t  n 
)
inline

Set device memory to zero.

Parameters
ptrDevice pointer.
nNumber of elements.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ diff_kernel()

__global__ void cuda_mlp::diff_kernel ( const CudaScalar output,
const CudaScalar target,
CudaScalar diff,
int  n 
)

Kernel: diff = output - target.

◆ launch_activation()

void cuda_mlp::launch_activation ( CudaScalar a,
int  n,
ActivationType  act 
)
inline

Launch activation kernel.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ launch_activation_deriv()

void cuda_mlp::launch_activation_deriv ( CudaScalar grad,
const CudaScalar a,
int  n,
ActivationType  act 
)
inline

Launch activation-derivative kernel.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ launch_add_bias()

void cuda_mlp::launch_add_bias ( CudaScalar z,
const CudaScalar b,
int  rows,
int  cols 
)
inline

Launch add-bias kernel.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ launch_diff()

void cuda_mlp::launch_diff ( const CudaScalar output,
const CudaScalar target,
CudaScalar diff,
int  n 
)
inline

Launch diff kernel.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ launch_sum_rows()

void cuda_mlp::launch_sum_rows ( const CudaScalar mat,
CudaScalar out,
int  rows,
int  cols 
)
inline

Launch sum-rows kernel.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sum_rows_kernel()

__global__ void cuda_mlp::sum_rows_kernel ( const CudaScalar mat,
CudaScalar out,
int  rows,
int  cols 
)

Kernel: sum columns (rows x cols) into a row vector.