18#ifndef TMVA_DNN_ARCHITECTURES_CUDA
19#define TMVA_DNN_ARCHITECTURES_CUDA
62template<
typename AReal = Float_t>
131 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights,
const std::vector<Matrix_t> & weights) {
132 if (!newWeights.empty()) newWeights.clear();
133 size_t n = weights.size();
134 for (
size_t i = 0; i <
n; ++i)
135 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
148 Error(
"InitializeBNormDescriptrs",
"Batch normalization on GPU is supported only with Cudnn");
239 const Tensor_t & activationGradients,
241 const Tensor_t & activationBackward);
254 template<
typename AMatrix_t>
267 template<
typename ATensor_t>
269 const ATensor_t &
B);
272 template<
typename AMatrix_t>
274 const std::vector<AMatrix_t> &
B);
298 const double coef = 0.0,
const AFloat alpha = 1,
526 size_t zeroPaddingHeight,
527 size_t zeroPaddingWidth);
529 static void Im2colIndices(std::vector<int> &V,
const Matrix_t &
B,
size_t nLocalViews,
size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
530 size_t fltWidth,
size_t strideRows,
size_t strideCols,
size_t zeroPaddingHeight,
531 size_t zeroPaddingWidth);
537 size_t filterWidth,
size_t numFilters);
578 size_t batchSize,
size_t inputHeight,
579 size_t inputWidth,
size_t depth,
580 size_t height,
size_t width,
581 size_t filterDepth,
size_t filterHeight,
582 size_t filterWidth,
size_t nLocalViews );
588 const Matrix_t &weights,
size_t batchSize,
589 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
590 size_t width,
size_t filterDepth,
size_t filterHeight,
597 const Tensor_t &activations_backward,
598 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
599 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
600 size_t filterWidth,
size_t nLocalViews);
605 size_t batchSize,
size_t depth,
size_t nLocalViews);
622 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
623 size_t fltWidth,
size_t strideRows,
size_t strideCols);
634 const Tensor_t &activationGradients,
717 Fatal(
"TCuda::LSTMLayerBackward",
"Recurrent layers are not supported in the native Cuda architecture!!!");
718 return state_gradients_backward;
735 Fatal(
"TCuda::GRULayerBackward",
"Recurrent layers are not supported in the native Cuda architecture!!!");
736 return state_gradients_backward;
836template <
typename AFloat>
837template <
typename AMatrix_t>
848template <
typename AFloat>
849template <
typename AMatrix_t>
851 const std::vector<AMatrix_t> &
A)
853 for (
size_t i = 0; i <
B.size(); ++i) {
854 CopyDiffArch(
B[i],
A[i]);
858template <
typename AFloat>
861 std::cout <<
name <<
" size = " <<
A.GetSize() <<
" shape = { ";
862 auto shape =
A.GetShape();
863 for (
size_t k = 0; k < shape.size()-1; ++k)
864 std::cout << shape[k] <<
" , ";
865 std::cout << shape.back() <<
" } ";
866 std::cout <<
" strides = { ";
867 auto strides =
A.GetStrides();
868 for (
size_t k = 0; k < strides.size()-1; ++k)
869 std::cout << strides[k] <<
" , ";
870 std::cout << strides.back() <<
" }\n ";
872 if (
A.GetShape().size() == 2 ) {
873 for (
size_t i = 0; i <
A.GetShape()[0]; ++i) {
875 for (
size_t j = 0; j <
A.GetShape()[1]; ++j) {
876 std::cout <<
A(i,j) <<
" ";
878 std::cout <<
" } " << std::endl;
880 }
else if (
A.GetShape().size() == 3 ) {
881 for (
size_t i = 0; i <
A.GetFirstSize(); ++i) {
883 for (
size_t j = 0; j <
A.GetHSize(); ++j) {
885 for (
size_t k = 0; k <
A.GetWSize(); ++k) {
886 std::cout <<
A(i,j,k) <<
" ";
888 std::cout <<
" } " << std::endl;
890 std::cout <<
" } " << std::endl;
894 for (
size_t l = 0;
l <
A.GetSize(); ++
l) {
895 std::cout <<
A.GetData()[
l] <<
" ";
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
TCudaMatrix< AFloat > GetMatrix() const
The TCuda architecture class.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
TCudaMatrix< AFloat > Matrix_t
static Matrix_t & LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SymmetricRelu(Tensor_t &B)
static void InitializeUniform(Matrix_t &A)
static void FastTanh(Tensor_t &B)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void InitializeIdentity(Matrix_t &A)
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void InitializeGlorotUniform(Matrix_t &A)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void InitializeGRUTensors(GenLayer_t *)
static void FreeConvWorkspace(TWorkspace *&)
Only used for certain cudnn on-device memory.
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
static void InitializeZero(Tensor_t &A)
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Sigmoid(Tensor_t &B)
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
static void InitializeLSTMTensors(GenLayer_t *)
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void FreeRNNWorkspace(TWorkspace *&)
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Copy(Tensor_t &A, const Tensor_t &B)
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
static void Tanh(Tensor_t &B)
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom * fgRandomGen
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void ScaleAdd(Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
Above functions extended to vectors.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void Gauss(Tensor_t &B)
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
static void Hadamard(Matrix_t &A, const Matrix_t &B)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
TCudaTensor< AFloat > Tensor_t
static void InitializeGlorotNormal(Matrix_t &A)
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
static Scalar_t L2Regularization(const Matrix_t &W)
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom & GetRandomGenerator()
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void InitializeGauss(Matrix_t &A)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
static void ReleaseBNormDescriptors(TDescriptors *&)
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
static void SoftSign(Tensor_t &B)
static void InitializeRNNTensors(GenLayer_t *)
TCudaDeviceBuffer< AFloat > DeviceBuffer_t
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
static void FreePoolDropoutWorkspace(TWorkspace *&)
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
static void SetRandomSeed(size_t seed)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void ReleaseDescriptor(ActivationDescriptor_t &)
static void CopyDiffArch(std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
static void ReleaseRNNDescriptors(TDescriptors *&)
static Matrix_t & GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
Backward pass for GRU Network.
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void Relu(Tensor_t &B)
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Scalar_t L1Regularization(const Matrix_t &W)
static void InitializeZero(Matrix_t &A)
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void DropoutForward(Matrix_t &A, Scalar_t p)
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void CopyDiffArch(Tensor_t &A, const ATensor_t &B)
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void ReleasePoolDescriptors(TDescriptors *&)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic General Layer class.
This is the base class for the ROOT Random number generators.
double beta(double x, double y)
Calculates the beta function.
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations
static void output(int code)