Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
TMVA::DNN::TCuda< AReal > Class Template Reference

template<typename AReal = Float_t>
class TMVA::DNN::TCuda< AReal >

The TCuda architecture class.

Low-level interface class for CUDA computing architectures. Contains as public types the declaration of the scalar, matrix and buffer types for this architecture as well as the remaining functions in the low-level interface in the form of static members.

Definition at line 64 of file Cuda.h.

Public Types

using ActivationDescriptor_t = CudaActivationDescriptor
 
using AFloat = AReal
 
using AlgorithmBackward_t = CudaConvolutionBwdDataAlgo
 
using AlgorithmDataType_t = DummyCudaDataType
 
using AlgorithmForward_t = CudaConvolutionFwdAlgo
 
using AlgorithmHelper_t = CudaConvolutionBwdFilterAlgo
 
using BNormDescriptors_t = TDNNGenDescriptors< BNormLayer_t >
 
using BNormLayer_t = TBatchNormLayer< TCuda< AReal > >
 
using ConvDescriptors_t = CNN::TCNNDescriptors< ConvLayer_t >
 
using ConvLayer_t = CNN::TConvLayer< TCuda< AReal > >
 
using ConvolutionDescriptor_t = CudaConvolutionDescriptor
 
using ConvWorkspace_t = CNN::TCNNWorkspace< ConvLayer_t >
 
using DeviceBuffer_t = TCudaDeviceBuffer< AFloat >
 
using DropoutDescriptor_t = CudaDropoutDescriptor
 
using EmptyDescriptor_t = CudaEmptyDescriptor
 
using FilterDescriptor_t = CudaFilterDescriptor
 
using GenLayer_t = VGeneralLayer< TCuda< AReal > >
 
using HostBuffer_t = TCudaHostBuffer< AFloat >
 
using Matrix_t = TCudaMatrix< AFloat >
 
using PoolingDescriptor_t = CudaPoolingDescriptor
 
using PoolingDescriptors_t = CNN::TCNNDescriptors< PoolingLayer_t >
 
using PoolingLayer_t = CNN::TMaxPoolLayer< TCuda< AReal > >
 
using PoolingWorkspace_t = CNN::TCNNWorkspace< PoolingLayer_t >
 
using RecurrentDescriptor_t = DummyCudaDataType
 
using ReduceTensorDescriptor_t = DummyCudaDataType
 
using RNNDescriptors_t = RNN::TRNNDescriptors< TCuda< AReal > >
 
using RNNWorkspace_t = RNN::TRNNWorkspace< TCuda< AReal > >
 
using Scalar_t = AFloat
 
using Tensor_t = TCudaTensor< AFloat >
 
using TensorDescriptor_t = DummyCudaDataType
 

Public Member Functions

template<typename AMatrix_t >
void CopyDiffArch (std::vector< TCudaMatrix< AFloat > > &B, const std::vector< AMatrix_t > &A)
 
template<typename AMatrix_t >
void CopyDiffArch (TCudaMatrix< AFloat > &B, const AMatrix_t &A)
 
void Multiply (TCudaMatrix< double > &C, const TCudaMatrix< double > &A, const TCudaMatrix< double > &B)
 
void Multiply (TCudaMatrix< float > &C, const TCudaMatrix< float > &A, const TCudaMatrix< float > &B)
 
void MultiplyTranspose (TCudaMatrix< double > &output, const TCudaMatrix< double > &input, const TCudaMatrix< double > &Weights)
 
void MultiplyTranspose (TCudaMatrix< float > &output, const TCudaMatrix< float > &input, const TCudaMatrix< float > &Weights)
 
void ScaleAdd (TCudaMatrix< double > &B, const TCudaMatrix< double > &A, double alpha)
 
void ScaleAdd (TCudaMatrix< float > &B, const TCudaMatrix< float > &A, float alpha)
 
void SumColumns (TCudaMatrix< double > &B, const TCudaMatrix< double > &A, double alpha, double beta)
 
void SumColumns (TCudaMatrix< float > &B, const TCudaMatrix< float > &A, float alpha, float beta)
 
void SumRows (TCudaMatrix< double > &B, const TCudaMatrix< double > &A)
 
void SumRows (TCudaMatrix< float > &B, const TCudaMatrix< float > &A)
 
void TransposeMultiply (TCudaMatrix< double > &C, const TCudaMatrix< double > &A, const TCudaMatrix< double > &B, double alpha, double beta)
 
void TransposeMultiply (TCudaMatrix< float > &C, const TCudaMatrix< float > &A, const TCudaMatrix< float > &B, float alpha, float beta)
 

Static Public Member Functions

static void ConvLayerForward (Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
 Forward propagation in the Convolutional layer.
 
static Tensor_t CreateTensor (DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
 
static Tensor_t CreateTensor (DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
 
static Tensor_t CreateTensor (size_t b, size_t t, size_t w)
 
static Tensor_t CreateTensor (size_t n, size_t c, size_t h, size_t w)
 
static void CreateWeightTensors (std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
 
static void FreeConvWorkspace (TWorkspace *&)
 Only used for certain cudnn on-device memory.
 
static void FreePoolDropoutWorkspace (TWorkspace *&)
 
static void FreeRNNWorkspace (TWorkspace *&)
 
static TMVA::Experimental::MemoryLayout GetTensorLayout ()
 
static void InitializeActivationDescriptor (ActivationDescriptor_t &, EActivationFunction, double=0.0)
 
static void InitializeBNormDescriptors (TDescriptors *&, BNormLayer_t *)
 Initialize CNN data/operator descriptors.
 
static void InitializeConvDescriptors (TDescriptors *&, ConvLayer_t *)
 
static void InitializeConvWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
 
static void InitializeGRUDescriptors (TDescriptors *&, GenLayer_t *)
 
static void InitializeGRUTensors (GenLayer_t *)
 
static void InitializeGRUWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *)
 
static void InitializeLSTMDescriptors (TDescriptors *&, GenLayer_t *)
 
static void InitializeLSTMTensors (GenLayer_t *)
 
static void InitializeLSTMWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *)
 
static void InitializePoolDescriptors (TDescriptors *&, PoolingLayer_t *)
 
static void InitializePoolDropoutWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
 
static void InitializeRNNDescriptors (TDescriptors *&, GenLayer_t *)
 
static void InitializeRNNTensors (GenLayer_t *)
 
static void InitializeRNNWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *)
 
static bool IsCudnn ()
 
static void PrepareInternals (Tensor_t &)
 Dummy placeholder - preparation is currently only required for the CUDA architecture.
 
static void ReleaseBNormDescriptors (TDescriptors *&)
 
static void ReleaseConvDescriptors (TDescriptors *&)
 Release CNN data/operator descriptors.
 
static void ReleaseDescriptor (ActivationDescriptor_t &)
 
static void ReleasePoolDescriptors (TDescriptors *&)
 
static void ReleaseRNNDescriptors (TDescriptors *&)
 
Forward Propagation

Low-level functions required for the forward propagation of activations through the network.

static void MultiplyTranspose (Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
 Matrix-multiply input with the transpose of weights and write the results into output.
 
static void MultiplyTranspose (Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
 
static void AddRowWise (Matrix_t &output, const Matrix_t &biases)
 Add the vectors biases row-wise to the matrix output.
 
static void AddRowWise (Tensor_t &output, const Matrix_t &biases)
 
Backward Propagation (Dense Layers)

Low-level functions required for the forward propagation of activations through the network.

static void Backward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
 Perform the complete backward propagation step.
 
static void ScaleAdd (Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
 Adds a the elements in matrix B scaled by c to the elements in the matrix A.
 
static void Copy (Matrix_t &B, const Matrix_t &A)
 
template<typename AMatrix_t >
static void CopyDiffArch (Matrix_t &B, const AMatrix_t &A)
 
static void ScaleAdd (Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
 Above functions extended to vectors.
 
static void Copy (Tensor_t &A, const Tensor_t &B)
 
template<typename ATensor_t >
static void CopyDiffArch (Tensor_t &A, const ATensor_t &B)
 
template<typename AMatrix_t >
static void CopyDiffArch (std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
 
Activation Functions

For each activation function, the low-level interface contains two routines.

One that applies the activation function to a matrix and one that evaluate the derivatives of the activation function at the elements of a given matrix and writes the results into the result matrix.

static void ActivationFunctionForward (Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
 
static void ActivationFunctionBackward (Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
 Computes the gradient of the activation function.
 
static void IdentityDerivative (Tensor_t &B, const Tensor_t &A)
 
static void Relu (Tensor_t &B)
 
static void ReluDerivative (Tensor_t &B, const Tensor_t &A)
 
static void Sigmoid (Tensor_t &B)
 
static void SigmoidDerivative (Tensor_t &B, const Tensor_t &A)
 
static void Tanh (Tensor_t &B)
 
static void TanhDerivative (Tensor_t &B, const Tensor_t &A)
 
static void FastTanh (Tensor_t &B)
 
static void FastTanhDerivative (Tensor_t &B, const Tensor_t &A)
 
static void SymmetricRelu (Tensor_t &B)
 
static void SymmetricReluDerivative (Tensor_t &B, const Tensor_t &A)
 
static void SoftSign (Tensor_t &B)
 
static void SoftSignDerivative (Tensor_t &B, const Tensor_t &A)
 
static void Gauss (Tensor_t &B)
 
static void GaussDerivative (Tensor_t &B, const Tensor_t &A)
 
Loss Functions

Loss functions compute a scalar value given the output of the network for a given training input and the expected network prediction Y that quantifies the quality of the prediction.

For each function also a routing that computes the gradients (suffixed by Gradients) must be provided for the starting of the backpropagation algorithm.

static Scalar_t MeanSquaredError (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 
static void MeanSquaredErrorGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 
static Scalar_t CrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.
 
static void CrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 
static Scalar_t SoftmaxCrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 Softmax transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.
 
static void SoftmaxCrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
 
Output Functions

Output functions transform the activations output of the output layer in the network to a valid prediction YHat for the desired usage of the network, e.g.

the identity function for regression or the sigmoid transformation for two-class classification.

static void Sigmoid (Matrix_t &YHat, const Matrix_t &)
 
static void Softmax (Matrix_t &YHat, const Matrix_t &)
 
Regularization

For each regularization type two functions are required, one named <Type>Regularization that evaluates the corresponding regularization functional for a given weight matrix and the Add<Type>RegularizationGradients, that adds the regularization component in the gradients to the provided matrix.

static Scalar_t L1Regularization (const Matrix_t &W)
 
static void AddL1RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
 
static Scalar_t L2Regularization (const Matrix_t &W)
 
static void AddL2RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
 
Initialization

For each initialization method, one function in the low-level interface is provided.

The naming scheme is

Initialize<Type>

for a given initialization method Type.

static void InitializeGauss (Matrix_t &A)
 
static void InitializeUniform (Matrix_t &A)
 
static void InitializeIdentity (Matrix_t &A)
 
static void InitializeZero (Matrix_t &A)
 
static void InitializeZero (Tensor_t &A)
 
static void InitializeGlorotNormal (Matrix_t &A)
 Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a normal distribution with stddev = sqrt(2/N_input + N_output) and values larger than 2 * stddev are discarded See Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf.
 
static void InitializeGlorotUniform (Matrix_t &A)
 Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
 
static TRandomGetRandomGenerator ()
 
static void SetRandomSeed (size_t seed)
 
Dropout
static void DropoutForward (Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
 Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal of p.
 
static void DropoutForward (Matrix_t &A, Scalar_t p)
 
static void DropoutBackward (Tensor_t &, TDescriptors *, TWorkspace *)
 
Batch Normalization Layer Propagation
static void BatchNormLayerForwardTraining (int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
 The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:
 
static void BatchNormLayerForwardInference (int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
 During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance.
 
static void BatchNormLayerBackward (int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
 
Forward Propagation in Convolutional Layer
static size_t calculateDimension (size_t imgDim, size_t fltDim, size_t padding, size_t stride)
 Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters.
 
static void Im2col (Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
 Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
 
static void Im2colIndices (std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
 
static void Im2colFast (Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
 
static void RotateWeights (Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
 Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
 
static void AddConvBiases (Matrix_t &output, const Matrix_t &biases)
 Add the biases in the Convolutional Layer.
 
Backward Propagation in Convolutional Layer
static void ConvLayerBackward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
 Perform the complete backward propagation step in a Convolutional Layer.
 
static void CalculateConvActivationGradients (Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
 Utility function for calculating the activation gradients of the layer before the convolutional layer.
 
static void CalculateConvWeightGradients (Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
 Utility function for calculating the weight gradients of the convolutional layer.
 
static void CalculateConvBiasGradients (Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
 Utility function for calculating the bias gradients of the convolutional layer.
 
Forward Propagation in Max Pooling Layer
static void Downsample (Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
 Downsample the matrix C to the matrix A, using max operation, such that the winning indices are stored in matrix B.
 
Backward Propagation in Max Pooling Layer
static void MaxPoolLayerBackward (Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
 Perform the complete backward propagation step in a Pooling Layer.
 
Forward and Backward Propagation in Reshape Layer
static void Reshape (Matrix_t &A, const Matrix_t &B)
 Transform the matrix B to a matrix with different dimensions A.
 
static void Flatten (Tensor_t &A, const Tensor_t &B)
 Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
 
static void Deflatten (Tensor_t &A, const Tensor_t &B)
 Transforms each row of B to a matrix and stores it in the tensor B.
 
static void Rearrange (Tensor_t &out, const Tensor_t &in)
 Rearrage data according to time fill B x T x D out with T x B x D matrix in.
 
static Matrix_tRecurrentLayerBackward (Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
 Backward pass for Recurrent Networks.
 
static void RNNForward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
 
static void RNNBackward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
 
static Matrix_tLSTMLayerBackward (Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
 
static Matrix_tGRULayerBackward (Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
 Backward pass for GRU Network.
 
Additional Arithmetic Functions

Additional arithmetic on CUDA matrices used to implement the low-level interface.

static void Multiply (Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
 Standard multiplication of two matrices A and B with the result being written into C.
 
static void TransposeMultiply (Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
 Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
 
static void Hadamard (Tensor_t &A, const Tensor_t &B)
 In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
 
static void Hadamard (Matrix_t &A, const Matrix_t &B)
 
static void SumColumns (Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
 Sum columns of (m x n) matrix A and write the results into the first m elements in A.
 
static Scalar_t Sum (const Matrix_t &A)
 Compute the sum of all elements in A.
 
static bool AlmostEquals (const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
 Check two matrices for equality, taking floating point arithmetic errors into account.
 
static void ConstAdd (Matrix_t &A, Scalar_t beta)
 Add the constant beta to all the elements of matrix A and write the result into A.
 
static void ConstMult (Matrix_t &A, Scalar_t beta)
 Multiply the constant beta to all the elements of matrix A and write the result into A.
 
static void ReciprocalElementWise (Matrix_t &A)
 Reciprocal each element of the matrix A and write the result into A.
 
static void SquareElementWise (Matrix_t &A)
 Square each element of the matrix A and write the result into A.
 
static void SqrtElementWise (Matrix_t &A)
 Square root each element of the matrix A and write the result into A.
 
static void AdamUpdate (Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
 Adam updates.
 
static void AdamUpdateFirstMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta)
 
static void AdamUpdateSecondMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta)
 
static void PrintTensor (const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
 
static void SumRows (Matrix_t &B, const Matrix_t &A)
 extra functions defined only for CPU architecture !!!
 

Static Private Attributes

static TRandomfgRandomGen = nullptr
 

#include <TMVA/DNN/Architectures/Cuda.h>

Member Typedef Documentation

◆ ActivationDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ActivationDescriptor_t = CudaActivationDescriptor

Definition at line 78 of file Cuda.h.

◆ AFloat

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::AFloat = AReal

Definition at line 70 of file Cuda.h.

◆ AlgorithmBackward_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::AlgorithmBackward_t = CudaConvolutionBwdDataAlgo

Definition at line 87 of file Cuda.h.

◆ AlgorithmDataType_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::AlgorithmDataType_t = DummyCudaDataType

Definition at line 89 of file Cuda.h.

◆ AlgorithmForward_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::AlgorithmForward_t = CudaConvolutionFwdAlgo

Definition at line 86 of file Cuda.h.

◆ AlgorithmHelper_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::AlgorithmHelper_t = CudaConvolutionBwdFilterAlgo

Definition at line 88 of file Cuda.h.

◆ BNormDescriptors_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::BNormDescriptors_t = TDNNGenDescriptors<BNormLayer_t>

Definition at line 97 of file Cuda.h.

◆ BNormLayer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::BNormLayer_t = TBatchNormLayer<TCuda<AReal> >

Definition at line 96 of file Cuda.h.

◆ ConvDescriptors_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ConvDescriptors_t = CNN::TCNNDescriptors<ConvLayer_t>

Definition at line 100 of file Cuda.h.

◆ ConvLayer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ConvLayer_t = CNN::TConvLayer<TCuda<AReal> >

Definition at line 99 of file Cuda.h.

◆ ConvolutionDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ConvolutionDescriptor_t = CudaConvolutionDescriptor

Definition at line 79 of file Cuda.h.

◆ ConvWorkspace_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ConvWorkspace_t = CNN::TCNNWorkspace<ConvLayer_t>

Definition at line 101 of file Cuda.h.

◆ DeviceBuffer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::DeviceBuffer_t = TCudaDeviceBuffer<AFloat>

Definition at line 75 of file Cuda.h.

◆ DropoutDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::DropoutDescriptor_t = CudaDropoutDescriptor

Definition at line 81 of file Cuda.h.

◆ EmptyDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::EmptyDescriptor_t = CudaEmptyDescriptor

Definition at line 93 of file Cuda.h.

◆ FilterDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::FilterDescriptor_t = CudaFilterDescriptor

Definition at line 80 of file Cuda.h.

◆ GenLayer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::GenLayer_t = VGeneralLayer<TCuda<AReal> >

Definition at line 95 of file Cuda.h.

◆ HostBuffer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::HostBuffer_t = TCudaHostBuffer<AFloat>

Definition at line 76 of file Cuda.h.

◆ Matrix_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::Matrix_t = TCudaMatrix<AFloat>

Definition at line 73 of file Cuda.h.

◆ PoolingDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::PoolingDescriptor_t = CudaPoolingDescriptor

Definition at line 83 of file Cuda.h.

◆ PoolingDescriptors_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::PoolingDescriptors_t = CNN::TCNNDescriptors<PoolingLayer_t>

Definition at line 103 of file Cuda.h.

◆ PoolingLayer_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::PoolingLayer_t = CNN::TMaxPoolLayer<TCuda<AReal> >

Definition at line 102 of file Cuda.h.

◆ PoolingWorkspace_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::PoolingWorkspace_t = CNN::TCNNWorkspace<PoolingLayer_t>

Definition at line 104 of file Cuda.h.

◆ RecurrentDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::RecurrentDescriptor_t = DummyCudaDataType

Definition at line 91 of file Cuda.h.

◆ ReduceTensorDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::ReduceTensorDescriptor_t = DummyCudaDataType

Definition at line 90 of file Cuda.h.

◆ RNNDescriptors_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::RNNDescriptors_t = RNN::TRNNDescriptors<TCuda<AReal> >

Definition at line 106 of file Cuda.h.

◆ RNNWorkspace_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::RNNWorkspace_t = RNN::TRNNWorkspace<TCuda<AReal> >

Definition at line 107 of file Cuda.h.

◆ Scalar_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::Scalar_t = AFloat

Definition at line 71 of file Cuda.h.

◆ Tensor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::Tensor_t = TCudaTensor<AFloat>

Definition at line 74 of file Cuda.h.

◆ TensorDescriptor_t

template<typename AReal = Float_t>
using TMVA::DNN::TCuda< AReal >::TensorDescriptor_t = DummyCudaDataType

Definition at line 84 of file Cuda.h.

Member Function Documentation

◆ ActivationFunctionBackward()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::ActivationFunctionBackward ( Tensor_t dX,
const Tensor_t Y,
const Tensor_t dY,
const Tensor_t X,
EActivationFunction  activFunct,
const ActivationDescriptor_t  activationDescr,
const AFloat  alpha = 1,
const AFloat  beta = 0 
)
static

Computes the gradient of the activation function.

Definition at line 37 of file ActivationFunctions.cu.

◆ ActivationFunctionForward()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::ActivationFunctionForward ( Tensor_t X,
EActivationFunction  activFunct,
const ActivationDescriptor_t  activationDescr,
const double  coef = 0.0,
const AFloat  alpha = 1,
const AFloat  beta = 0 
)
static

Definition at line 28 of file ActivationFunctions.cu.

◆ AdamUpdate()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AdamUpdate ( Matrix_t A,
const Matrix_t M,
const Matrix_t V,
Scalar_t  alpha,
Scalar_t  eps 
)
static

Adam updates.

Definition at line 425 of file Arithmetic.cu.

◆ AdamUpdateFirstMom()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AdamUpdateFirstMom ( Matrix_t A,
const Matrix_t B,
Scalar_t  beta 
)
static

Definition at line 441 of file Arithmetic.cu.

◆ AdamUpdateSecondMom()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AdamUpdateSecondMom ( Matrix_t A,
const Matrix_t B,
Scalar_t  beta 
)
static

Definition at line 455 of file Arithmetic.cu.

◆ AddConvBiases()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AddConvBiases ( Matrix_t output,
const Matrix_t biases 
)
static

Add the biases in the Convolutional Layer.


Definition at line 432 of file Propagation.cu.

◆ AddL1RegularizationGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AddL1RegularizationGradients ( Matrix_t A,
const Matrix_t W,
Scalar_t  weightDecay 
)
static

Definition at line 43 of file Regularization.cu.

◆ AddL2RegularizationGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::AddL2RegularizationGradients ( Matrix_t A,
const Matrix_t W,
Scalar_t  weightDecay 
)
static

Definition at line 76 of file Regularization.cu.

◆ AddRowWise() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::AddRowWise ( Matrix_t output,
const Matrix_t biases 
)
static

Add the vectors biases row-wise to the matrix output.

◆ AddRowWise() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::AddRowWise ( Tensor_t output,
const Matrix_t biases 
)
inlinestatic

Definition at line 217 of file Cuda.h.

◆ AlmostEquals()

template<typename AReal = Float_t>
bool TMVA::DNN::TCuda< AFloat >::AlmostEquals ( const Matrix_t A,
const Matrix_t B,
double  epsilon = 0.1 
)
static

Check two matrices for equality, taking floating point arithmetic errors into account.

Checks two matrices for element-wise equality.

Template Parameters
AFloatAn architecture-specific floating point number type.
Parameters
AThe first matrix.
BThe second matrix.
epsilonEquality tolerance, needed to address floating point arithmetic.
Returns
Whether the two matrices can be considered equal element-wise

Definition at line 291 of file Arithmetic.cu.

◆ Backward()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Backward ( Tensor_t activationGradientsBackward,
Matrix_t weightGradients,
Matrix_t biasGradients,
const Tensor_t df,
const Tensor_t activationGradients,
const Matrix_t weights,
const Tensor_t activationBackward 
)
static

Perform the complete backward propagation step.

If the provided activationGradientsBackward matrix is not empty, compute the gradients of the objective function with respect to the activations of the previous layer (backward direction). Also compute the weight and the bias gradients. Modifies the values in df and thus produces only a valid result, if it is applied the first time after the corresponding forward propagation has been per- formed.

Definition at line 91 of file Propagation.cu.

◆ BatchNormLayerBackward()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::BatchNormLayerBackward ( int  axis,
const Tensor_t x,
const Tensor_t dy,
Tensor_t dx,
Matrix_t gamma,
Matrix_t dgamma,
Matrix_t dbeta,
const Matrix_t mean,
const Matrix_t variance,
const Matrix_t iVariance,
Scalar_t  epsilon,
const TensorDescriptor_t  
)
static

Definition at line 754 of file Propagation.cu.

◆ BatchNormLayerForwardInference()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::BatchNormLayerForwardInference ( int  axis,
const Tensor_t x,
Matrix_t gamma,
Matrix_t beta,
Tensor_t y,
const Matrix_t runningMeans,
const Matrix_t runningVars,
Scalar_t  epsilon,
const TensorDescriptor_t  
)
static

During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance.

Definition at line 743 of file Propagation.cu.

◆ BatchNormLayerForwardTraining()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::BatchNormLayerForwardTraining ( int  axis,
const Tensor_t x,
Tensor_t y,
Matrix_t gamma,
Matrix_t beta,
Matrix_t mean,
Matrix_t ,
Matrix_t iVariance,
Matrix_t runningMeans,
Matrix_t runningVars,
Scalar_t  nTrainedBatches,
Scalar_t  momentum,
Scalar_t  epsilon,
const TensorDescriptor_t bnParDescriptor 
)
static

The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:

  • a scale factor \gamma gamma
  • an offset \beta beta

Definition at line 729 of file Propagation.cu.

◆ CalculateConvActivationGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::CalculateConvActivationGradients ( Tensor_t activationGradientsBackward,
const Tensor_t df,
const Matrix_t weights,
size_t  batchSize,
size_t  inputHeight,
size_t  inputWidth,
size_t  depth,
size_t  height,
size_t  width,
size_t  filterDepth,
size_t  filterHeight,
size_t  filterWidth 
)
static

Utility function for calculating the activation gradients of the layer before the convolutional layer.

Definition at line 324 of file Propagation.cu.

◆ CalculateConvBiasGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::CalculateConvBiasGradients ( Matrix_t biasGradients,
const Tensor_t df,
size_t  batchSize,
size_t  depth,
size_t  nLocalViews 
)
static

Utility function for calculating the bias gradients of the convolutional layer.

Definition at line 416 of file Propagation.cu.

◆ CalculateConvWeightGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::CalculateConvWeightGradients ( Matrix_t weightGradients,
const Tensor_t df,
const Tensor_t activations_backward,
size_t  batchSize,
size_t  inputHeight,
size_t  inputWidth,
size_t  depth,
size_t  height,
size_t  width,
size_t  filterDepth,
size_t  filterHeight,
size_t  filterWidth,
size_t  nLocalViews 
)
static

Utility function for calculating the weight gradients of the convolutional layer.

Definition at line 369 of file Propagation.cu.

◆ calculateDimension()

template<typename AFloat >
size_t TMVA::DNN::TCuda< AFloat >::calculateDimension ( size_t  imgDim,
size_t  fltDim,
size_t  padding,
size_t  stride 
)
static

Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters.

Definition at line 151 of file Propagation.cu.

◆ ConstAdd()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ConstAdd ( Matrix_t A,
Scalar_t  beta 
)
static

Add the constant beta to all the elements of matrix A and write the result into A.

Definition at line 357 of file Arithmetic.cu.

◆ ConstMult()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ConstMult ( Matrix_t A,
Scalar_t  beta 
)
static

Multiply the constant beta to all the elements of matrix A and write the result into A.

Definition at line 371 of file Arithmetic.cu.

◆ ConvLayerBackward()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ConvLayerBackward ( Tensor_t activationGradientsBackward,
Matrix_t weightGradients,
Matrix_t biasGradients,
Tensor_t df,
Tensor_t activationGradients,
const Matrix_t weights,
const Tensor_t activationBackward,
const Tensor_t outputTensor,
EActivationFunction  activFunc,
const ConvDescriptors_t ,
ConvWorkspace_t ,
size_t  batchSize,
size_t  inputHeight,
size_t  inputWidth,
size_t  depth,
size_t  height,
size_t  width,
size_t  filterDepth,
size_t  filterHeight,
size_t  filterWidth,
size_t  nLocalViews 
)
static

Perform the complete backward propagation step in a Convolutional Layer.

If the provided activationGradientsBackward matrix is not empty, compute the gradients of the objective function with respect to the activations of the previous layer (backward direction). Also compute the weight and the bias gradients. Modifies the values in df and thus produces only a valid result, if it is applied the first time after the corresponding forward propagation has been per- formed.

Definition at line 276 of file Propagation.cu.

◆ ConvLayerForward()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ConvLayerForward ( Tensor_t output,
Tensor_t inputActivationFunc,
const Tensor_t input,
const Matrix_t weights,
const Matrix_t biases,
const DNN::CNN::TConvParams params,
EActivationFunction  activFunc,
Tensor_t ,
const ConvDescriptors_t ,
ConvWorkspace_t  
)
static

Forward propagation in the Convolutional layer.

Definition at line 236 of file Propagation.cu.

◆ Copy() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Copy ( Matrix_t B,
const Matrix_t A 
)
static

◆ Copy() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Copy ( Tensor_t A,
const Tensor_t B 
)
static

◆ CopyDiffArch() [1/5]

template<typename AReal = Float_t>
template<typename AMatrix_t >
static void TMVA::DNN::TCuda< AReal >::CopyDiffArch ( Matrix_t B,
const AMatrix_t &  A 
)
static

◆ CopyDiffArch() [2/5]

template<typename AReal = Float_t>
template<typename AMatrix_t >
static void TMVA::DNN::TCuda< AReal >::CopyDiffArch ( std::vector< Matrix_t > &  A,
const std::vector< AMatrix_t > &  B 
)
static

◆ CopyDiffArch() [3/5]

template<typename AReal = Float_t>
template<typename AMatrix_t >
void TMVA::DNN::TCuda< AReal >::CopyDiffArch ( std::vector< TCudaMatrix< AFloat > > &  B,
const std::vector< AMatrix_t > &  A 
)

Definition at line 851 of file Cuda.h.

◆ CopyDiffArch() [4/5]

template<typename AReal = Float_t>
template<typename AMatrix_t >
void TMVA::DNN::TCuda< AReal >::CopyDiffArch ( TCudaMatrix< AFloat > &  B,
const AMatrix_t &  A 
)

Definition at line 839 of file Cuda.h.

◆ CopyDiffArch() [5/5]

template<typename AReal = Float_t>
template<typename ATensor_t >
static void TMVA::DNN::TCuda< AReal >::CopyDiffArch ( Tensor_t A,
const ATensor_t &  B 
)
static

◆ CreateTensor() [1/4]

template<typename AReal = Float_t>
static Tensor_t TMVA::DNN::TCuda< AReal >::CreateTensor ( DeviceBuffer_t  buffer,
size_t  b,
size_t  t,
size_t  w 
)
inlinestatic

Definition at line 121 of file Cuda.h.

◆ CreateTensor() [2/4]

template<typename AReal = Float_t>
static Tensor_t TMVA::DNN::TCuda< AReal >::CreateTensor ( DeviceBuffer_t  buffer,
size_t  n,
size_t  c,
size_t  h,
size_t  w 
)
inlinestatic

Definition at line 118 of file Cuda.h.

◆ CreateTensor() [3/4]

template<typename AReal = Float_t>
static Tensor_t TMVA::DNN::TCuda< AReal >::CreateTensor ( size_t  b,
size_t  t,
size_t  w 
)
inlinestatic

Definition at line 114 of file Cuda.h.

◆ CreateTensor() [4/4]

template<typename AReal = Float_t>
static Tensor_t TMVA::DNN::TCuda< AReal >::CreateTensor ( size_t  n,
size_t  c,
size_t  h,
size_t  w 
)
inlinestatic

Definition at line 111 of file Cuda.h.

◆ CreateWeightTensors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::CreateWeightTensors ( std::vector< Matrix_t > &  newWeights,
const std::vector< Matrix_t > &  weights 
)
inlinestatic

Definition at line 132 of file Cuda.h.

◆ CrossEntropy()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::CrossEntropy ( const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.

Definition at line 68 of file LossFunctions.cu.

◆ CrossEntropyGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::CrossEntropyGradients ( Matrix_t dY,
const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Definition at line 88 of file LossFunctions.cu.

◆ Deflatten()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Deflatten ( Tensor_t A,
const Tensor_t B 
)
static

Transforms each row of B to a matrix and stores it in the tensor B.

Deflatten a matrix into a vector of matrices.

Parameters
[out]AOutput matrices. Each element will be a part of the input.
[in]BInput flat matrix.
[in]sizeNumber of matrices in the output vector.
[in]nRowsNumber of rows in each matrix of the output vector.
[in]nColsNumber of columns on each matrix of the output vector.

Each row in the input matrix is the concatenation of the same row in each of the output matrices. Passing an std::vector to a CUDA kernel is a non trivial task that requires manually allocating and copying to device memory - details in comments within the function's body. Launching one thread per input element.

Definition at line 670 of file Propagation.cu.

◆ Downsample()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Downsample ( Tensor_t A,
Tensor_t B,
const Tensor_t C,
const PoolingDescriptors_t ,
PoolingWorkspace_t ,
size_t  imgHeight,
size_t  imgWidth,
size_t  fltHeight,
size_t  fltWidth,
size_t  strideRows,
size_t  strideCols 
)
static

Downsample the matrix C to the matrix A, using max operation, such that the winning indices are stored in matrix B.

Downsampling function used as the forward propagation step of a Max-Pooling layer.

Parameters
[out]AThe output matrix. Each row corresponds to a slice and each element is the max within a receptive field.
[out]BThe winning indices matrix. Each element is the index of the max element.
[in]CThe input matrix. Each row is a slice.
[in]imgHeightThe heigh of the input.
[in]imgWidthThe output of the input.
[in]fltHeightHeight of the kernel.
[in]fltWidthWidth of the kernel.
[in]strideRowsstride size in the horizontal dimension.
[in]strideColsstride size in the vertical dimension.

Each output element is the maximum of the receptive field. We also save the winning indices to facilitate back-propagation - we need to know which input element influenced the output and only apply the derivative correction to this particular element. The slicing process is the same as in a convolutional layer, however padding is set to 0.

Definition at line 468 of file Propagation.cu.

◆ DropoutBackward()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::DropoutBackward ( Tensor_t ,
TDescriptors ,
TWorkspace  
)
inlinestatic

Definition at line 469 of file Cuda.h.

◆ DropoutForward() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::DropoutForward ( Matrix_t A,
Scalar_t  p 
)
inlinestatic

Definition at line 464 of file Cuda.h.

◆ DropoutForward() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::DropoutForward ( Tensor_t A,
TDescriptors descriptors,
TWorkspace workspace,
Scalar_t  p 
)
static

Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal of p.

◆ FastTanh()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::FastTanh ( Tensor_t B)
inlinestatic

Definition at line 325 of file Cuda.h.

◆ FastTanhDerivative()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::FastTanhDerivative ( Tensor_t B,
const Tensor_t A 
)
inlinestatic

Definition at line 326 of file Cuda.h.

◆ Flatten()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Flatten ( Tensor_t A,
const Tensor_t B 
)
static

Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.

Flatten a vector of matrices into a single matrix.

Parameters
[out]AOutput matrix.
[in]BInput vector. Each element is a matrix to be concatenated.
[in]sizeNumber of matrices in the input vector.
[in]nRowsNumber of rows in each matrix of the input vector.
[in]nColsNumber of columns on each matrix of the input vector.

Each row in the output matrix is the concatenation of the same row in each of the input matrices. Passing an std::vector to a CUDA kernel is a non trivial task that requires manually allocating and copying to device memory - details in comments within the function's body. Launching one thread per output element.

Definition at line 592 of file Propagation.cu.

◆ FreeConvWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::FreeConvWorkspace ( TWorkspace *&  )
inlinestatic

Only used for certain cudnn on-device memory.

Definition at line 174 of file Cuda.h.

◆ FreePoolDropoutWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::FreePoolDropoutWorkspace ( TWorkspace *&  )
inlinestatic

Definition at line 175 of file Cuda.h.

◆ FreeRNNWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::FreeRNNWorkspace ( TWorkspace *&  )
inlinestatic

Definition at line 190 of file Cuda.h.

◆ Gauss()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Gauss ( Tensor_t B)
static

Definition at line 218 of file ActivationFunctions.cu.

◆ GaussDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::GaussDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 231 of file ActivationFunctions.cu.

◆ GetRandomGenerator()

template<typename AFloat >
TRandom & TMVA::DNN::TCuda< AFloat >::GetRandomGenerator
static

Definition at line 37 of file Initialization.cu.

◆ GetTensorLayout()

template<typename AReal = Float_t>
static TMVA::Experimental::MemoryLayout TMVA::DNN::TCuda< AReal >::GetTensorLayout ( )
inlinestatic

Definition at line 109 of file Cuda.h.

◆ GRULayerBackward()

template<typename AReal = Float_t>
static Matrix_t & TMVA::DNN::TCuda< AReal >::GRULayerBackward ( Matrix_t state_gradients_backward,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
Matrix_t ,
bool   
)
inlinestatic

Backward pass for GRU Network.

Definition at line 724 of file Cuda.h.

◆ Hadamard() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Hadamard ( Matrix_t A,
const Matrix_t B 
)
static

◆ Hadamard() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Hadamard ( Tensor_t A,
const Tensor_t B 
)
static

In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.

◆ IdentityDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::IdentityDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 53 of file ActivationFunctions.cu.

◆ Im2col()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Im2col ( Matrix_t A,
const Matrix_t B,
size_t  imgHeight,
size_t  imgWidth,
size_t  fltHeight,
size_t  fltWidth,
size_t  strideRows,
size_t  strideCols,
size_t  zeroPaddingHeight,
size_t  zeroPaddingWidth 
)
static

Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.

A helper for image operations that rearranges image regions into column vectors.

Parameters
[out]AThe output matrix. Each row corresponds to a receptive field.
[in]BThe input matrix. Each row corresponds to a row in the image view.
[in]imgHeightThe heigh of the input.
[in]imgWidthThe output of the input.
[in]fltHeightHeight of the kernel.
[in]fltWidthWidth of the kernel.
[in]strideRowsstride size in the horizontal dimension.
[in]strideColsstride size in the vertical dimension.
[in]zeroPaddingHeightThe padding in the horizontal dimension.
[in]zeroPaddingWidthThe padding in the vertical dimension.

This transformation allows us to express a 2D convolution as a matrix multiplication. We can therefore harness the finely tuned GEMM implementation of cuBLAS to achieve maximum performance. This function can greatly speed-up propagation in TConvLayer.

Definition at line 183 of file Propagation.cu.

◆ Im2colFast()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Im2colFast ( Matrix_t A,
const Matrix_t B,
const std::vector< int > &  V 
)
static

◆ Im2colIndices()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Im2colIndices ( std::vector< int > &  V,
const Matrix_t B,
size_t  nLocalViews,
size_t  imgHeight,
size_t  imgWidth,
size_t  fltHeight,
size_t  fltWidth,
size_t  strideRows,
size_t  strideCols,
size_t  zeroPaddingHeight,
size_t  zeroPaddingWidth 
)
static

◆ InitializeActivationDescriptor()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeActivationDescriptor ( ActivationDescriptor_t ,
EActivationFunction  ,
double  = 0.0 
)
inlinestatic

Definition at line 156 of file Cuda.h.

◆ InitializeBNormDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeBNormDescriptors ( TDescriptors *&  ,
BNormLayer_t  
)
inlinestatic

Initialize CNN data/operator descriptors.

Not used at the moment.

Definition at line 147 of file Cuda.h.

◆ InitializeConvDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeConvDescriptors ( TDescriptors *&  ,
ConvLayer_t  
)
inlinestatic

Definition at line 152 of file Cuda.h.

◆ InitializeConvWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeConvWorkspace ( TWorkspace *&  ,
TDescriptors *&  ,
const DNN::CNN::TConvParams ,
ConvLayer_t  
)
inlinestatic

Definition at line 163 of file Cuda.h.

◆ InitializeGauss()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::InitializeGauss ( Matrix_t A)
static

Definition at line 44 of file Initialization.cu.

◆ InitializeGlorotNormal()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::InitializeGlorotNormal ( Matrix_t A)
static

Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a normal distribution with stddev = sqrt(2/N_input + N_output) and values larger than 2 * stddev are discarded See Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf.

Definition at line 90 of file Initialization.cu.

◆ InitializeGlorotUniform()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::InitializeGlorotUniform ( Matrix_t A)
static

Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).

This initialization is also called Xavier uniform see Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf

Definition at line 119 of file Initialization.cu.

◆ InitializeGRUDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeGRUDescriptors ( TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 179 of file Cuda.h.

◆ InitializeGRUTensors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeGRUTensors ( GenLayer_t )
inlinestatic

Definition at line 187 of file Cuda.h.

◆ InitializeGRUWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeGRUWorkspace ( TWorkspace *&  ,
TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 183 of file Cuda.h.

◆ InitializeIdentity()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::InitializeIdentity ( Matrix_t A)
static

Definition at line 142 of file Initialization.cu.

◆ InitializeLSTMDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeLSTMDescriptors ( TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 178 of file Cuda.h.

◆ InitializeLSTMTensors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeLSTMTensors ( GenLayer_t )
inlinestatic

Definition at line 186 of file Cuda.h.

◆ InitializeLSTMWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeLSTMWorkspace ( TWorkspace *&  ,
TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 182 of file Cuda.h.

◆ InitializePoolDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializePoolDescriptors ( TDescriptors *&  ,
PoolingLayer_t  
)
inlinestatic

Definition at line 154 of file Cuda.h.

◆ InitializePoolDropoutWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializePoolDropoutWorkspace ( TWorkspace *&  ,
TDescriptors *&  ,
const DNN::CNN::TConvParams ,
PoolingLayer_t  
)
inlinestatic

Definition at line 167 of file Cuda.h.

◆ InitializeRNNDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeRNNDescriptors ( TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 177 of file Cuda.h.

◆ InitializeRNNTensors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeRNNTensors ( GenLayer_t )
inlinestatic

Definition at line 185 of file Cuda.h.

◆ InitializeRNNWorkspace()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeRNNWorkspace ( TWorkspace *&  ,
TDescriptors *&  ,
GenLayer_t  
)
inlinestatic

Definition at line 181 of file Cuda.h.

◆ InitializeUniform()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::InitializeUniform ( Matrix_t A)
static

Definition at line 65 of file Initialization.cu.

◆ InitializeZero() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeZero ( Matrix_t A)
static

◆ InitializeZero() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::InitializeZero ( Tensor_t A)
static

◆ IsCudnn()

template<typename AReal = Float_t>
static bool TMVA::DNN::TCuda< AReal >::IsCudnn ( )
inlinestatic

Definition at line 139 of file Cuda.h.

◆ L1Regularization()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::L1Regularization ( const Matrix_t W)
static

Definition at line 27 of file Regularization.cu.

◆ L2Regularization()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::L2Regularization ( const Matrix_t W)
static

Definition at line 60 of file Regularization.cu.

◆ LSTMLayerBackward()

template<typename AReal = Float_t>
static Matrix_t & TMVA::DNN::TCuda< AReal >::LSTMLayerBackward ( Matrix_t state_gradients_backward,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
const Matrix_t ,
Matrix_t ,
Matrix_t ,
Matrix_t  
)
inlinestatic

Definition at line 701 of file Cuda.h.

◆ MaxPoolLayerBackward()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::MaxPoolLayerBackward ( Tensor_t activationGradientsBackward,
const Tensor_t activationGradients,
const Tensor_t indexMatrix,
const Tensor_t ,
const Tensor_t ,
const PoolingDescriptors_t ,
PoolingWorkspace_t ,
size_t  imgHeight,
size_t  imgWidth,
size_t  fltHeight,
size_t  fltWidth,
size_t  strideRows,
size_t  strideCols,
size_t  nLocalViews 
)
static

Perform the complete backward propagation step in a Pooling Layer.

Based on the winning indices stored in the index matrix, it just forwards the activation gradients to the previous layer.

Definition at line 499 of file Propagation.cu.

◆ MeanSquaredError()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::MeanSquaredError ( const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Definition at line 28 of file LossFunctions.cu.

◆ MeanSquaredErrorGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::MeanSquaredErrorGradients ( Matrix_t dY,
const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Definition at line 48 of file LossFunctions.cu.

◆ Multiply() [1/3]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Multiply ( Matrix_t C,
const Matrix_t A,
const Matrix_t B 
)
static

Standard multiplication of two matrices A and B with the result being written into C.

◆ Multiply() [2/3]

void TMVA::DNN::TCuda< double >::Multiply ( TCudaMatrix< double > &  C,
const TCudaMatrix< double > &  A,
const TCudaMatrix< double > &  B 
)

Definition at line 55 of file Arithmetic.cu.

◆ Multiply() [3/3]

void TMVA::DNN::TCuda< float >::Multiply ( TCudaMatrix< float > &  C,
const TCudaMatrix< float > &  A,
const TCudaMatrix< float > &  B 
)

Definition at line 28 of file Arithmetic.cu.

◆ MultiplyTranspose() [1/4]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::MultiplyTranspose ( Matrix_t output,
const Matrix_t input,
const Matrix_t weights 
)
static

Matrix-multiply input with the transpose of weights and write the results into output.

◆ MultiplyTranspose() [2/4]

void TMVA::DNN::TCuda< double >::MultiplyTranspose ( TCudaMatrix< double > &  output,
const TCudaMatrix< double > &  input,
const TCudaMatrix< double > &  Weights 
)

Definition at line 52 of file Propagation.cu.

◆ MultiplyTranspose() [3/4]

void TMVA::DNN::TCuda< float >::MultiplyTranspose ( TCudaMatrix< float > &  output,
const TCudaMatrix< float > &  input,
const TCudaMatrix< float > &  Weights 
)

Definition at line 28 of file Propagation.cu.

◆ MultiplyTranspose() [4/4]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::MultiplyTranspose ( Tensor_t output,
const Tensor_t input,
const Matrix_t weights 
)
inlinestatic

Definition at line 208 of file Cuda.h.

◆ PrepareInternals()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::PrepareInternals ( Tensor_t )
inlinestatic

Dummy placeholder - preparation is currently only required for the CUDA architecture.

Definition at line 545 of file Cuda.h.

◆ PrintTensor()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::PrintTensor ( const Tensor_t A,
const std::string  name = "Cuda-tensor",
bool  = false 
)
static

Definition at line 860 of file Cuda.h.

◆ Rearrange()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AReal >::Rearrange ( Tensor_t out,
const Tensor_t in 
)
static

Rearrage data according to time fill B x T x D out with T x B x D matrix in.

Definition at line 548 of file Propagation.cu.

◆ ReciprocalElementWise()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ReciprocalElementWise ( Matrix_t A)
static

Reciprocal each element of the matrix A and write the result into A.

Definition at line 385 of file Arithmetic.cu.

◆ RecurrentLayerBackward()

template<typename AReal = Float_t>
TCudaMatrix< AFloat > & TMVA::DNN::TCuda< AFloat >::RecurrentLayerBackward ( Matrix_t state_gradients_backward,
Matrix_t input_weight_gradients,
Matrix_t state_weight_gradients,
Matrix_t bias_gradients,
Matrix_t df,
const Matrix_t state,
const Matrix_t weights_input,
const Matrix_t weights_state,
const Matrix_t input,
Matrix_t input_gradient 
)
static

Backward pass for Recurrent Networks.

Definition at line 29 of file RecurrentPropagation.cu.

◆ ReleaseBNormDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ReleaseBNormDescriptors ( TDescriptors *&  )
inlinestatic

Definition at line 161 of file Cuda.h.

◆ ReleaseConvDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ReleaseConvDescriptors ( TDescriptors *&  )
inlinestatic

Release CNN data/operator descriptors.

Not used at the moment.

Definition at line 159 of file Cuda.h.

◆ ReleaseDescriptor()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ReleaseDescriptor ( ActivationDescriptor_t )
inlinestatic

Definition at line 172 of file Cuda.h.

◆ ReleasePoolDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ReleasePoolDescriptors ( TDescriptors *&  )
inlinestatic

Definition at line 160 of file Cuda.h.

◆ ReleaseRNNDescriptors()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ReleaseRNNDescriptors ( TDescriptors *&  )
inlinestatic

Definition at line 189 of file Cuda.h.

◆ Relu()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Relu ( Tensor_t B)
static

Definition at line 68 of file ActivationFunctions.cu.

◆ ReluDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::ReluDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 81 of file ActivationFunctions.cu.

◆ Reshape()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Reshape ( Matrix_t A,
const Matrix_t B 
)
static

Transform the matrix B to a matrix with different dimensions A.

Definition at line 535 of file Propagation.cu.

◆ RNNBackward()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::RNNBackward ( const Tensor_t ,
const Matrix_t ,
const Matrix_t ,
const Tensor_t ,
const Tensor_t ,
const Matrix_t ,
const Matrix_t ,
const Tensor_t ,
Tensor_t ,
Matrix_t ,
Matrix_t ,
Tensor_t ,
const RNNDescriptors_t ,
RNNWorkspace_t  
)
inlinestatic

Definition at line 693 of file Cuda.h.

◆ RNNForward()

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::RNNForward ( const Tensor_t ,
const Matrix_t ,
const Matrix_t ,
const Tensor_t ,
Tensor_t ,
Matrix_t ,
Matrix_t ,
const RNNDescriptors_t ,
RNNWorkspace_t ,
bool   
)
inlinestatic

Definition at line 687 of file Cuda.h.

◆ RotateWeights()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::RotateWeights ( Matrix_t A,
const Matrix_t B,
size_t  filterDepth,
size_t  filterHeight,
size_t  filterWidth,
size_t  numFilters 
)
static

Rotates the matrix B, which is representing a weights, and stores them in the matrix A.

Definition at line 207 of file Propagation.cu.

◆ ScaleAdd() [1/4]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ScaleAdd ( Matrix_t A,
const Matrix_t B,
Scalar_t  beta = 1.0 
)
static

Adds a the elements in matrix B scaled by c to the elements in the matrix A.

This is required for the weight update in the gradient descent step.

◆ ScaleAdd() [2/4]

void TMVA::DNN::TCuda< double >::ScaleAdd ( TCudaMatrix< double > &  B,
const TCudaMatrix< double > &  A,
double  alpha 
)

Definition at line 330 of file Arithmetic.cu.

◆ ScaleAdd() [3/4]

void TMVA::DNN::TCuda< float >::ScaleAdd ( TCudaMatrix< float > &  B,
const TCudaMatrix< float > &  A,
float  alpha 
)

Definition at line 317 of file Arithmetic.cu.

◆ ScaleAdd() [4/4]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::ScaleAdd ( Tensor_t A,
const Tensor_t B,
Scalar_t  beta = 1.0 
)
static

Above functions extended to vectors.

◆ SetRandomSeed()

template<typename AFloat >
void TMVA::DNN::TCuda< AFloat >::SetRandomSeed ( size_t  seed)
static

Definition at line 31 of file Initialization.cu.

◆ Sigmoid() [1/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Sigmoid ( Matrix_t YHat,
const Matrix_t  
)
static

◆ Sigmoid() [2/2]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::Sigmoid ( Tensor_t B)
static

◆ SigmoidDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SigmoidDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 111 of file ActivationFunctions.cu.

◆ Softmax()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Softmax ( Matrix_t YHat,
const Matrix_t  
)
static

Definition at line 42 of file OutputFunctions.cu.

◆ SoftmaxCrossEntropy()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::SoftmaxCrossEntropy ( const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Softmax transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.

Definition at line 108 of file LossFunctions.cu.

◆ SoftmaxCrossEntropyGradients()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SoftmaxCrossEntropyGradients ( Matrix_t dY,
const Matrix_t Y,
const Matrix_t output,
const Matrix_t weights 
)
static

Definition at line 128 of file LossFunctions.cu.

◆ SoftSign()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SoftSign ( Tensor_t B)
static

Definition at line 188 of file ActivationFunctions.cu.

◆ SoftSignDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SoftSignDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 201 of file ActivationFunctions.cu.

◆ SqrtElementWise()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SqrtElementWise ( Matrix_t A)
static

Square root each element of the matrix A and write the result into A.

Definition at line 411 of file Arithmetic.cu.

◆ SquareElementWise()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SquareElementWise ( Matrix_t A)
static

Square each element of the matrix A and write the result into A.

Definition at line 398 of file Arithmetic.cu.

◆ Sum()

template<typename AReal = Float_t>
AFloat TMVA::DNN::TCuda< AFloat >::Sum ( const Matrix_t A)
static

Compute the sum of all elements in A.

Definition at line 172 of file Arithmetic.cu.

◆ SumColumns() [1/3]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::SumColumns ( Matrix_t B,
const Matrix_t A,
Scalar_t  alpha = 1.0,
Scalar_t  beta = 0. 
)
static

Sum columns of (m x n) matrix A and write the results into the first m elements in A.

◆ SumColumns() [2/3]

void TMVA::DNN::TCuda< double >::SumColumns ( TCudaMatrix< double > &  B,
const TCudaMatrix< double > &  A,
double  alpha,
double  beta 
)

Definition at line 213 of file Arithmetic.cu.

◆ SumColumns() [3/3]

void TMVA::DNN::TCuda< float >::SumColumns ( TCudaMatrix< float > &  B,
const TCudaMatrix< float > &  A,
float  alpha,
float  beta 
)

Definition at line 189 of file Arithmetic.cu.

◆ SumRows() [1/3]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::SumRows ( Matrix_t B,
const Matrix_t A 
)
static

extra functions defined only for CPU architecture !!!

Sum rows of (m x n) matrix A and write the results into the first m elements in B.

◆ SumRows() [2/3]

void TMVA::DNN::TCuda< double >::SumRows ( TCudaMatrix< double > &  B,
const TCudaMatrix< double > &  A 
)

Definition at line 259 of file Arithmetic.cu.

◆ SumRows() [3/3]

void TMVA::DNN::TCuda< float >::SumRows ( TCudaMatrix< float > &  B,
const TCudaMatrix< float > &  A 
)

Definition at line 236 of file Arithmetic.cu.

◆ SymmetricRelu()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SymmetricRelu ( Tensor_t B)
static

Definition at line 158 of file ActivationFunctions.cu.

◆ SymmetricReluDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::SymmetricReluDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 171 of file ActivationFunctions.cu.

◆ Tanh()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::Tanh ( Tensor_t B)
static

Definition at line 128 of file ActivationFunctions.cu.

◆ TanhDerivative()

template<typename AReal = Float_t>
void TMVA::DNN::TCuda< AFloat >::TanhDerivative ( Tensor_t B,
const Tensor_t A 
)
static

Definition at line 141 of file ActivationFunctions.cu.

◆ TransposeMultiply() [1/3]

template<typename AReal = Float_t>
static void TMVA::DNN::TCuda< AReal >::TransposeMultiply ( Matrix_t output,
const Matrix_t input,
const Matrix_t Weights,
Scalar_t  alpha = 1.0,
Scalar_t  beta = 0. 
)
static

Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.

◆ TransposeMultiply() [2/3]

void TMVA::DNN::TCuda< double >::TransposeMultiply ( TCudaMatrix< double > &  C,
const TCudaMatrix< double > &  A,
const TCudaMatrix< double > &  B,
double  alpha,
double  beta 
)

Definition at line 109 of file Arithmetic.cu.

◆ TransposeMultiply() [3/3]

void TMVA::DNN::TCuda< float >::TransposeMultiply ( TCudaMatrix< float > &  C,
const TCudaMatrix< float > &  A,
const TCudaMatrix< float > &  B,
float  alpha,
float  beta 
)

Definition at line 82 of file Arithmetic.cu.

Member Data Documentation

◆ fgRandomGen

template<typename AFloat >
TRandom * TMVA::DNN::TCuda< AFloat >::fgRandomGen = nullptr
staticprivate

Definition at line 67 of file Cuda.h.

  • tmva/tmva/inc/TMVA/DNN/Architectures/Cuda.h
  • tmva/tmva/src/DNN/Architectures/Cuda/ActivationFunctions.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/Arithmetic.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/Initialization.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/LossFunctions.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/OutputFunctions.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/Propagation.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/RecurrentPropagation.cu
  • tmva/tmva/src/DNN/Architectures/Cuda/Regularization.cu