36 float alpha = 1.0, beta = 0.0;
39 cudaStream_t s =
output.GetComputeStream();
60 double alpha = 1.0, beta = 0.0;
63 cudaStream_t s =
output.GetComputeStream();
75template<
typename AFloat>
81 cudaStream_t s =
Weights.GetComputeStream();
82 ::TMVA::DNN::Cuda::AddRowWise<<<gridDims, blockDims, 0, s>>>(
84 theta.GetDataPointer(),
90template<
typename AFloat>
128template<
typename AFloat>
132 size_t m = B.GetNrows();
133 size_t n = B.GetNcols();
139template<
typename AFloat>
143 size_t n = B.GetSize();
150template<
typename AFloat>
155 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride)"
182template<
typename AFloat>
194 size_t depth = B.GetNrows();
198 cudaStream_t s = A.GetComputeStream();
200 ::TMVA::DNN::Cuda::Im2Col<<<gridDims, blockDims, 0, s>>>(A.GetDataPointer(), B.GetDataPointer(),
depth,
imgHeight,
imgWidth,
206template<
typename AFloat>
216 cudaStream_t s = B.GetComputeStream();
218 ::TMVA::DNN::Cuda::RotateWeights<<<gridDims, blockDims, 0, s>>>(A.GetDataPointer(), B.GetDataPointer(),
filterDepth,
223template <
typename AFloat>
235template <
typename AFloat>
254 for(
size_t event = 0;
event <
input.GetFirstSize();
event++) {
275template<
typename AFloat>
323template<
typename AFloat>
355 R__ASSERT( df.GetFirstSize() == batchSize);
358 for(
size_t event = 0;
event < batchSize;
event++) {
368template<
typename AFloat>
386 const size_t filterSize = filterHeight * filterWidth;
390 R__ASSERT( df.GetFirstSize() == batchSize);
404 for(
size_t event = 0;
event < batchSize;
event++) {
415template<
typename AFloat>
424 for (
size_t event = 0;
event < batchSize;
event++) {
431template<
typename AFloat>
437 cudaStream_t s =
output.GetComputeStream();
438 ::TMVA::DNN::Cuda::AddBiases<<<gridDims, blockDims, 0, s>>>(
467template<
typename AFloat>
480 size_t depth = C.GetCSize();
481 size_t bsize = C.GetFirstSize();
485 cudaStream_t s = A.GetComputeStream();
487 for(
size_t event = 0;
event <
bsize;
event++) {
492 ::TMVA::DNN::Cuda::Downsample<<<gridDims, blockDims, 0, s>>>(A.GetDataPointerAt(event), B.GetDataPointerAt(event),
498template<
typename AFloat>
523 for(
size_t event = 0;
event <
bsize;
event++) {
529 strideRows, strideCols);
534template<
typename AFloat>
539 cudaStream_t s = A.GetComputeStream();
541 ::TMVA::DNN::Cuda::Reshape<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
542 A.GetNrows(), A.GetNcols(), B.GetNrows(), B.GetNcols());
547template <
typename AReal>
591template<
typename AFloat>
596 size_t nDepth = B.GetFirstSize();
597 size_t nRows = B.GetCSize();
598 size_t nCols = B.GetWSize();
599 if (B.GetNDim()==4)
nCols *= B.GetHSize();
604 cudaStream_t s = A.GetComputeStream();
639 if (B.GetLayout() == GetTensorLayout() )
640 ::TMVA::DNN::Cuda::Flatten<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
nDepth,
nRows,
nCols);
644 ::TMVA::DNN::Cuda::FlattenRM<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
nDepth,
nRows,
nCols);
669template<
typename AFloat>
673 size_t nDepth = A.GetFirstSize();
674 size_t nRows = A.GetCSize();
675 size_t nCols = A.GetWSize();
676 if (A.GetNDim()==4)
nCols *= A.GetHSize();
682 cudaStream_t s = B.GetComputeStream();
705 if (A.GetLayout() == GetTensorLayout() )
706 ::TMVA::DNN::Cuda::Deflatten<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
nDepth,
nRows,
nCols);
709 ::TMVA::DNN::Cuda::DeflattenRM<<<gridDims, blockDims>>>(A.GetDataPointer(), B.GetDataPointer(),
nDepth,
nRows,
nCols);
728template <
typename AFloat>
742template <
typename AFloat>
753template <
typename AFloat>
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
EActivationFunction
Enum that represents layer activation functions.
create variable transformations
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.