| 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AbsoluteSum (AFloat *result, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AdamUpdate (AFloat *A, const AFloat *M, const AFloat *V, int m, int n, AFloat alpha, AFloat eps) | 
|   | optimizer kernel functions  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AdamUpdateFirstMom (AFloat *A, const AFloat *B, int m, int n, AFloat beta) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AdamUpdateSecondMom (AFloat *A, const AFloat *B, int m, int n, AFloat beta) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AddBiases (AFloat *A, const AFloat *B, int nRows, int nCols) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AddL1RegularizationGradients (AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AddL2RegularizationGradients (AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AddRowWise (AFloat *W, const AFloat *theta, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::AlmostEquals (bool *result, const AFloat *A, const AFloat *B, double epsilon, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __device__ AFloat  | TMVA::DNN::Cuda::AtomicAdd (AFloat *address, AFloat val) | 
|   | 
| template<>  | 
| __device__ double  | TMVA::DNN::Cuda::AtomicAdd (double *address, double val) | 
|   | 
| template<>  | 
| __device__ float  | TMVA::DNN::Cuda::AtomicAdd (float *address, float val) | 
|   | 
| __device__ int  | TMVA::DNN::Cuda::calculateDimension (int imgDim, int fltDim, int padding, int stride) | 
|   | Calculate the dimension of an output volume, given the sliding parameters and the input shape.  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::ConstAdd (AFloat *A, AFloat beta, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::ConstMult (AFloat *A, AFloat beta, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::CrossEntropy (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::CrossEntropyGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Deflatten (AFloat *A, const AFloat *B, int size, int nRows, int nCols) | 
|   | Deflatten a 2D-array into an array of 2D-arrays.  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::DeflattenRM (AFloat *A, const AFloat *B, int size, int nRows, int nCols) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Downsample (AFloat *output, AFloat *indexMatrix, const AFloat *input, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols) | 
|   | Downsampling kernel used as the forward propagation step of a Max-Pooling layer.  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Dropout (AFloat *A, int m, int n, AFloat dropoutProbability, curandState_t *state) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Flatten (AFloat *A, const AFloat *B, int size, int nRows, int nCols) | 
|   | Flatten an array of 2D-arrays into a single 2D-array.  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::FlattenRM (AFloat *A, const AFloat *B, int size, int nRows, int nCols) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Gauss (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::GaussDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Hadamard (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::IdentityDerivative (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Im2Col (AFloat *A, const AFloat *B, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols, int zeroPaddingHeight, int zeroPaddingWidth) | 
|   | A kernel that re-arranges image regions of the input matrix \B, into column vectors in matrix \A.  
  | 
|   | 
| template<typename AFloat >  | 
| __device__ AFloat  | TMVA::DNN::Cuda::max (AFloat x, AFloat y) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::MaxPoolBackward (AFloat *activationGradientsBackward, const AFloat *activationGradients, const AFloat *indexMatrix, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols) | 
|   | Back-propagate the gradients through a max-pooling layer.  
  | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::MeanSquaredError (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::MeanSquaredErrorGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::ReciprocalElementWise (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::ReduceMatrix (AFloat *result, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __device__ void  | TMVA::DNN::Cuda::ReduceSum (AFloat *result, AFloat *sdata) | 
|   | 
| template<typename AFloat >  | 
| __device__ void  | TMVA::DNN::Cuda::ReduceSumVertical (AFloat *result, AFloat *sdata, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Relu (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::ReluDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Reshape (AFloat *A, const AFloat *B, int nRowsA, int nColsA, int nRowsB, int nColsB) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::RotateWeights (AFloat *A, const AFloat *B, int filterDepth, int filterHeight, int filterWidth, int numFilters) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Sigmoid (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Sigmoid (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SigmoidDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Softmax (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SoftmaxCrossEntropy (AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SoftmaxCrossEntropyGradients (AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SoftSign (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SoftSignDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SqrtElementWise (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SquaredSum (AFloat *result, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SquareElementWise (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SumColumns (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SymmetricRelu (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::SymmetricReluDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::Tanh (AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::TanhDerivative (AFloat *B, const AFloat *A, int m, int n) | 
|   | 
| template<typename AFloat >  | 
| __global__ void  | TMVA::DNN::Cuda::UpdateWeights (AFloat *A, const AFloat **B, int batchSize, int nRows, int nCols) | 
|   |