Logo ROOT  
Reference Guide
Cuda.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 05/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ///////////////////////////////////////////////////////////////////
13 // Definition of the TCuda architecture class, which provides an //
14 // implementation of the low-level functionality for neural //
15 // networks for the CUDA computing architectures. //
16 ///////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_CUDA
19 #define TMVA_DNN_ARCHITECTURES_CUDA
20 
21 #include "TMVA/DNN/Functions.h"
24 #include "TMVA/DNN/CNN/ConvLayer.h"
26 
27 
28 #include "cuda.h"
29 #include "Cuda/CudaBuffers.h"
30 #include "Cuda/CudaMatrix.h"
31 #include "Cuda/CudaTensor.h"
32 #include "TMVA/DNN/DataLoader.h"
33 #include <utility>
34 #include <vector>
35 #include <string>
36 
37 class TRandom;
38 
39 namespace TMVA
40 {
41 namespace DNN
42 {
51  struct CudaDataType {};
52  struct DummyCudaDataType {};
53 
55 
56 /** The TCuda architecture class.
57  *
58  * Low-level interface class for CUDA computing architectures. Contains as
59  * public types the declaration of the scalar, matrix and buffer types
60  * for this architecture as well as the remaining functions in the low-level
61  * interface in the form of static members.
62  */
63 template<typename AReal = Float_t>
64 class TCuda
65 {
66 private:
67  static TRandom * fgRandomGen;
68 public:
69 
70  using AFloat = AReal;
71  using Scalar_t = AFloat;
72 
77 
82  //using OpTensorDescriptor_t = CudaOpTensorDescriptor;
85  //using ReductionDescriptor_t = CudaReduceTensorDescriptor;
92 
93  using EmptyDescriptor_t = CudaEmptyDescriptor; // Used if a descriptor is not needed in a class
94 
98 
105 
108 
110 
111  static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
112  return Tensor_t( {c,h*w,n}, GetTensorLayout());
113  }
114  static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
115  {
116  return Tensor_t( {t, w, b}, GetTensorLayout());
117  }
118  static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
119  return Tensor_t( buffer, {c,h*w, n}, GetTensorLayout(), 0, 0);
120  }
121  static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
122  {
123  return Tensor_t(buffer, {t, w, b}, GetTensorLayout());
124  }
125 
126  // create a weight tensor/matrix from another tensor using its shape
127  // static Matrix_t CreateWeightTensor( Matrix_t & A) {
128  // return Matrix_t( A.GetNrows(), A.GetNcols());
129  // }
130  // create a weight tensor/matrix vector from another tensor/weight vector using the given tensor shapes
131  // this function is used by the optimizers to stgore intermidiate weights representations
132  static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
133  if (!newWeights.empty()) newWeights.clear();
134  size_t n = weights.size();
135  for (size_t i = 0; i < n; ++i)
136  newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
137  }
138 
139  static bool IsCudnn() { return false; }
140  //____________________________________________________________________________
141  //
142  // Architecture Initialization
143  //____________________________________________________________________________
144 
145  /** Initialize CNN data/operator descriptors. Not used at the moment.*/
146 
147  static void InitializeBNormDescriptors(TDescriptors * & /*descriptors*/,
148  BNormLayer_t */*L = nullptr*/) {
149  Error("InitializeBNormDescriptrs", "Batch normalization on GPU is supported only with Cudnn");
150  }
151 
152  static void InitializeConvDescriptors(TDescriptors *& /*descriptors*/, ConvLayer_t * /*L = nullptr*/) {}
153 
154  static void InitializePoolDescriptors(TDescriptors *& /*descriptors*/, PoolingLayer_t * /*L = nullptr*/) {}
155 
156  static void InitializeActivationDescriptor(ActivationDescriptor_t &/*descriptors*/, EActivationFunction /*activFunc */ , double /*coef*/ = 0.0) {}
157 
158  /** Release CNN data/operator descriptors. Not used at the moment.*/
159  static void ReleaseConvDescriptors(TDescriptors * & /*descriptors*/) {}
160  static void ReleasePoolDescriptors(TDescriptors * & /*descriptors*/) {}
161  static void ReleaseBNormDescriptors(TDescriptors *& /*descriptors*/) {}
162 
163  static void InitializeConvWorkspace(TWorkspace * & /*workspace*/,
164  TDescriptors * & /*descriptors*/,
165  const DNN::CNN::TConvParams & /*params*/,
166  ConvLayer_t */*L = nullptr*/) {}
167  static void InitializePoolDropoutWorkspace(TWorkspace * & /*workspace*/,
168  TDescriptors * & /*descriptors*/,
169  const DNN::CNN::TConvParams & /*params*/,
170  PoolingLayer_t */*L = nullptr*/) {}
171 
172  static void ReleaseDescriptor(ActivationDescriptor_t & /*activationDescr*/) {}
173 
174  static void FreeConvWorkspace(TWorkspace * & /*workspace*/) {} ///< Only used for certain cudnn on-device memory
175  static void FreePoolDropoutWorkspace(TWorkspace * & /*workspace*/) {}
176 
177  static void InitializeRNNDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
178  static void InitializeLSTMDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
179  static void InitializeGRUDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
180 
181  static void InitializeRNNWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
182  static void InitializeLSTMWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
183  static void InitializeGRUWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
184 
185  static void InitializeRNNTensors(GenLayer_t * /*layer*/) {}
186  static void InitializeLSTMTensors(GenLayer_t * /*layer*/) {}
187  static void InitializeGRUTensors(GenLayer_t * /*layer*/) {}
188 
189  static void ReleaseRNNDescriptors(TDescriptors *& /*descriptors*/) {}
190  static void FreeRNNWorkspace(TWorkspace *& /*workspace*/) {}
191 
192  //static void InitializeRNNTensors(RNNLayer_t * /*layer*/) {}
193 
194  //____________________________________________________________________________
195  //
196  // Propagation
197  //____________________________________________________________________________
198 
199  /** @name Forward Propagation
200  * Low-level functions required for the forward propagation of activations
201  * through the network.
202  */
203  ///@{
204  /** Matrix-multiply \p input with the transpose of \pweights and
205  * write the results into \p output. */
206  static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights);
207 
208  static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) {
209  Matrix_t output_matrix = output.GetMatrix();
210  MultiplyTranspose( output_matrix, input.GetMatrix(), weights);
211  //ensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
212  }
213 
214  /** Add the vectors biases row-wise to the matrix output */
215  static void AddRowWise(Matrix_t &output,const Matrix_t &biases);
216 
217  static void AddRowWise(Tensor_t &output, const Matrix_t &biases) {
218  Matrix_t output_matrix = output.GetMatrix();
219  AddRowWise(output_matrix, biases);
220  //Tensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
221  }
222 
223  /** @name Backward Propagation (Dense Layers)
224  * Low-level functions required for the forward propagation of activations
225  * through the network.
226  */
227  ///@{
228  /** Perform the complete backward propagation step. If the provided
229  * \p activationGradientsBackward matrix is not empty, compute the
230  * gradients of the objective function with respect to the activations
231  * of the previous layer (backward direction).
232  * Also compute the weight and the bias gradients. Modifies the values
233  * in \p df and thus produces only a valid result, if it is applied the
234  * first time after the corresponding forward propagation has been per-
235  * formed. */
236  static void Backward(Tensor_t & activationGradientsBackward,
237  Matrix_t & weightGradients,
238  Matrix_t & biasGradients,
239  const Tensor_t & df,
240  const Tensor_t & activationGradients,
241  const Matrix_t & weights,
242  const Tensor_t & activationBackward);
243 
244  /** Adds a the elements in matrix B scaled by c to the elements in
245  * the matrix A. This is required for the weight update in the gradient
246  * descent step.*/
247  static void ScaleAdd(Matrix_t & A,
248  const Matrix_t & B,
249  Scalar_t beta = 1.0);
250 
251  static void Copy(Matrix_t & B,
252  const Matrix_t & A);
253 
254  // copy from another type of matrix
255  template<typename AMatrix_t>
256  static void CopyDiffArch(Matrix_t & B, const AMatrix_t & A);
257 
258 
259  /** Above functions extended to vectors */
260  static void ScaleAdd(Tensor_t & A,
261  const Tensor_t & B,
262  Scalar_t beta = 1.0);
263 
264  static void Copy(Tensor_t & A,
265  const Tensor_t & B);
266 
267  // copy from another tensor
268  template<typename ATensor_t>
269  static void CopyDiffArch(Tensor_t & A,
270  const ATensor_t & B);
271 
272  // copy from vector of matrices of different types
273  template<typename AMatrix_t>
274  static void CopyDiffArch(std::vector<Matrix_t> & A,
275  const std::vector<AMatrix_t> & B);
276 
277  ///@}
278 
279  //____________________________________________________________________________
280  //
281  // Activation Functions
282  //____________________________________________________________________________
283 
284  /** @name Activation Functions
285  * For each activation function, the low-level interface contains two routines.
286  * One that applies the acitvation function to a matrix and one that evaluate
287  * the derivatives of the activation function at the elements of a given matrix
288  * and writes the results into the result matrix.
289  */
290  ///@{
291  /* impl using Matrix */
292  /*inline void evaluate(Matrix_t &A, EActivationFunction f)
293  {
294  Tensor_t tA(A);
295  evaluate<TCuda<AReal>>(tA,f);
296  }*/
297  static void ActivationFunctionForward(Tensor_t & X, EActivationFunction activFunct,
298  const ActivationDescriptor_t activationDescr,
299  const double coef = 0.0, const AFloat alpha = 1,
300  const AFloat beta = 0);
301 
302  /** Computes the gradient of the activation function */
303  static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
304  const Tensor_t & dY, const Tensor_t & X,
305  EActivationFunction activFunct,
306  const ActivationDescriptor_t activationDescr,
307  const AFloat alpha = 1,
308  const AFloat beta = 0);
309 
310  static void IdentityDerivative(Tensor_t & B,
311  const Tensor_t &A);
312 
313  static void Relu(Tensor_t & B);
314  static void ReluDerivative(Tensor_t & B,
315  const Tensor_t & A);
316 
317  static void Sigmoid(Tensor_t & B);
318  static void SigmoidDerivative(Tensor_t & B,
319  const Tensor_t & A);
320 
321  static void Tanh(Tensor_t & B);
322  static void TanhDerivative(Tensor_t & B,
323  const Tensor_t & A);
324 
325  static void FastTanh(Tensor_t &B) { return Tanh(B); }
326  static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A) { return TanhDerivative(B, A); }
327 
328  static void SymmetricRelu(Tensor_t & B);
329  static void SymmetricReluDerivative(Tensor_t & B,
330  const Tensor_t & A);
331 
332  static void SoftSign(Tensor_t & B);
333  static void SoftSignDerivative(Tensor_t & B,
334  const Tensor_t & A);
335 
336  static void Gauss(Tensor_t & B);
337  static void GaussDerivative(Tensor_t & B,
338  const Tensor_t & A);
339  ///@}
340 
341  //____________________________________________________________________________
342  //
343  // Loss Functions
344  //____________________________________________________________________________
345 
346  /** @name Loss Functions
347  * Loss functions compute a scalar value given the \p output of the network
348  * for a given training input and the expected network prediction \p Y that
349  * quantifies the quality of the prediction. For each function also a routing
350  * that computes the gradients (suffixed by Gradients) must be provided for
351  * the starting of the backpropagation algorithm.
352  */
353  ///@{
354 
355  static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output,
356  const Matrix_t &weights);
357  static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
358  const Matrix_t &output, const Matrix_t &weights);
359 
360  /** Sigmoid transformation is implicitly applied, thus \p output should
361  * hold the linear activations of the last layer in the net. */
362  static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output,
363  const Matrix_t &weights);
364 
365  static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
366  const Matrix_t &output, const Matrix_t &weights);
367 
368  /** Softmax transformation is implicitly applied, thus \p output should
369  * hold the linear activations of the last layer in the net. */
370  static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output,
371  const Matrix_t &weights);
372  static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
373  const Matrix_t &output, const Matrix_t &weights);
374  ///@}
375 
376  //____________________________________________________________________________
377  //
378  // Output Functions
379  //____________________________________________________________________________
380 
381  /** @name Output Functions
382  * Output functions transform the activations \p output of the
383  * output layer in the network to a valid prediction \p YHat for
384  * the desired usage of the network, e.g. the identity function
385  * for regression or the sigmoid transformation for two-class
386  * classification.
387  */
388  ///@{
389  static void Sigmoid(Matrix_t &YHat,
390  const Matrix_t & );
391  static void Softmax(Matrix_t &YHat,
392  const Matrix_t & );
393  ///@}
394 
395  //____________________________________________________________________________
396  //
397  // Regularization
398  //____________________________________________________________________________
399 
400  /** @name Regularization
401  * For each regularization type two functions are required, one named
402  * <tt><Type>Regularization</tt> that evaluates the corresponding
403  * regularization functional for a given weight matrix and the
404  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
405  * component in the gradients to the provided matrix.
406  */
407  ///@{
408 
409  static Scalar_t L1Regularization(const Matrix_t & W);
411  const Matrix_t & W,
413 
414  static Scalar_t L2Regularization(const Matrix_t & W);
416  const Matrix_t & W,
418  ///@}
419 
420  //____________________________________________________________________________
421  //
422  // Initialization
423  //____________________________________________________________________________
424 
425  /** @name Initialization
426  * For each initialization method, one function in the low-level interface
427  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
428  * initialization method Type.
429  */
430  ///@{
431 
432  static void InitializeGauss(Matrix_t & A);
433  static void InitializeUniform(Matrix_t & A);
434  static void InitializeIdentity(Matrix_t & A);
435  static void InitializeZero(Matrix_t & A);
436  static void InitializeZero(Tensor_t &A);
437  static void InitializeGlorotNormal(Matrix_t & A);
438  static void InitializeGlorotUniform(Matrix_t & A);
439 
440  // return static instance of random generator used for initialization
441  // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
442  static TRandom & GetRandomGenerator();
443  // set random seed for the static geenrator
444  // if the static geneerator does not exists it is created
445  static void SetRandomSeed(size_t seed);
446  ///@}
447 
448  //____________________________________________________________________________
449  //
450  // Dropout
451  //____________________________________________________________________________
452 
453  /** @name Dropout
454  */
455  ///@{
456 
457  /** Apply dropout with activation probability \p p to the given
458  * tensor \p A and scale the result by reciprocal of \p p. */
459  static void DropoutForward(Tensor_t & A,
460  TDescriptors * descriptors,
461  TWorkspace * workspace,
462  Scalar_t p);
463 
464  static void DropoutForward(Matrix_t & A, Scalar_t p) {
465  Tensor_t tA(A);
466  DropoutForward( tA, static_cast<TDescriptors *> (nullptr), static_cast<TWorkspace *> (nullptr), p );
467  }
468 
469  static void DropoutBackward(Tensor_t & /* A */,
470  TDescriptors * /*descriptors */,
471  TWorkspace * /* workspace */ ) {}
472  ///@}
473 
474  //____________________________________________________________________________
475  //
476  // Batch Normalization
477  //____________________________________________________________________________
478 
479  /** @name Batch Normalization Layer Propagation
480  */
481  ///@{
482 
483  /** The input from each batch are normalized during training to have zero mean and unit variance
484  * and they are then scaled by two parameter, different for each input variable:
485  * - a scale factor \gamma gamma
486  * - an offset \beta beta */
487 
488  static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
489  Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
490  Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
491  Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
492 
493  /** During inference the inputs are not normalized using the batch mean but the previously computed
494  * at running mean and variance */
495 
496  static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y,
497  const Matrix_t &runningMeans, const Matrix_t &runningVars,
499 
500  static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
501  Matrix_t &gamma, // Matrix_t &beta, (not needed)
502  Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
503  const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
504 
505  //____________________________________________________________________________
506  //
507  // Convolutional Layer Propagation
508  //____________________________________________________________________________
509 
510  /** @name Forward Propagation in Convolutional Layer
511  */
512  ///@{
513 
514  /** Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters. */
515  static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
516 
517  /** Transform the matrix B in local view format, suitable for
518  * convolution, and store it in matrix A */
519  static void Im2col(Matrix_t &A,
520  const Matrix_t &B,
521  size_t imgHeight,
522  size_t imgWidth,
523  size_t fltHeight,
524  size_t fltWidth,
525  size_t strideRows,
526  size_t strideCols,
527  size_t zeroPaddingHeight,
528  size_t zeroPaddingWidth);
529 
530  static void Im2colIndices(std::vector<int> &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight,
531  size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
532  size_t zeroPaddingWidth);
533  static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector<int> & V);
534 
535  /** Rotates the matrix \p B, which is representing a weights,
536  * and stores them in the matrix \p A. */
537  static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight,
538  size_t filterWidth, size_t numFilters);
539 
540  /** Add the biases in the Convolutional Layer. */
541  static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
542  ///@}
543 
544  /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
545  static void PrepareInternals(Tensor_t &) {}
546 
547  /** Forward propagation in the Convolutional layer */
548  static void ConvLayerForward(Tensor_t & output,
549  Tensor_t & inputActivationFunc,
550  const Tensor_t &input,
551  const Matrix_t &weights, const Matrix_t & biases,
552  const DNN::CNN::TConvParams & params, EActivationFunction activFunc,
553  Tensor_t & /* inputPrime */,
554  const ConvDescriptors_t & /*descriptors*/, // Empty struct for cuda architecture
555  ConvWorkspace_t & /*workspace*/); // Empty struct for cuda architecture
556  //void * cudnnWorkspace = nullptr); // Remains nullptr for cuda architecture
557  /** @name Backward Propagation in Convolutional Layer
558  */
559  ///@{
560 
561  /** Perform the complete backward propagation step in a Convolutional Layer.
562  * If the provided \p activationGradientsBackward matrix is not empty, compute the
563  * gradients of the objective function with respect to the activations
564  * of the previous layer (backward direction).
565  * Also compute the weight and the bias gradients. Modifies the values
566  * in \p df and thus produces only a valid result, if it is applied the
567  * first time after the corresponding forward propagation has been per-
568  * formed. */
569  static void ConvLayerBackward(Tensor_t &activationGradientsBackward,
570  Matrix_t &weightGradients, Matrix_t &biasGradients,
571  Tensor_t &df,
572  Tensor_t &activationGradients,
573  const Matrix_t &weights,
574  const Tensor_t &activationBackward,
575  const Tensor_t & outputTensor,
576  EActivationFunction activFunc,
577  const ConvDescriptors_t & /*descriptors*/,
578  ConvWorkspace_t & /*workspace*/,
579  size_t batchSize, size_t inputHeight,
580  size_t inputWidth, size_t depth,
581  size_t height, size_t width,
582  size_t filterDepth, size_t filterHeight,
583  size_t filterWidth, size_t nLocalViews );
584 
585  /** Utility function for calculating the activation gradients of the layer
586  * before the convolutional layer. */
587  static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward,
588  const Tensor_t &df,
589  const Matrix_t &weights, size_t batchSize,
590  size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
591  size_t width, size_t filterDepth, size_t filterHeight,
592  size_t filterWidth);
593 
594  /** Utility function for calculating the weight gradients of the convolutional
595  * layer. */
596  static void CalculateConvWeightGradients(Matrix_t &weightGradients,
597  const Tensor_t &df,
598  const Tensor_t &activations_backward,
599  size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
600  size_t height, size_t width, size_t filterDepth, size_t filterHeight,
601  size_t filterWidth, size_t nLocalViews);
602 
603  /** Utility function for calculating the bias gradients of the convolutional
604  * layer */
605  static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df,
606  size_t batchSize, size_t depth, size_t nLocalViews);
607  ///@}
608 
609  //____________________________________________________________________________
610  //
611  // Max Pooling Layer Propagation
612  //____________________________________________________________________________
613  /** @name Forward Propagation in Max Pooling Layer
614  */
615  ///@{
616 
617  /** Downsample the matrix \p C to the matrix \p A, using max
618  * operation, such that the winning indices are stored in matrix
619  * \p B. */
620  static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C,
621  const PoolingDescriptors_t & /*descriptors*/,
622  PoolingWorkspace_t & /*workspace*/,
623  size_t imgHeight, size_t imgWidth, size_t fltHeight,
624  size_t fltWidth, size_t strideRows, size_t strideCols);
625 
626  ///@}
627 
628  /** @name Backward Propagation in Max Pooling Layer
629  */
630  ///@{
631  /** Perform the complete backward propagation step in a Pooling Layer. Based on the
632  * winning idices stored in the index matrix, it just forwards the actiovation
633  * gradients to the previous layer. */
634  static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward,
635  const Tensor_t &activationGradients,
636  const Tensor_t &indexMatrix,
637  const Tensor_t & /*inputActivation*/,
638  const Tensor_t & /*outputTensor*/,
639  const PoolingDescriptors_t & /*descriptors*/,
640  PoolingWorkspace_t & /*workspace*/,
641  size_t imgHeight,
642  size_t imgWidth,
643  size_t fltHeight,
644  size_t fltWidth,
645  size_t strideRows,
646  size_t strideCols,
647  size_t nLocalViews);
648 
649  ///@}
650 
651  //____________________________________________________________________________
652  //
653  // Reshape Layer Propagation
654  //____________________________________________________________________________
655  /** @name Forward and Backward Propagation in Reshape Layer
656  */
657  ///@{
658 
659  /** Transform the matrix \p B to a matrix with different dimensions \p A */
660  static void Reshape(Matrix_t &A, const Matrix_t &B);
661 
662  /** Flattens the tensor \p B, such that each matrix, is stretched in
663  * one row, resulting with a matrix \p A. */
664  static void Flatten(Tensor_t &A, const Tensor_t &B); // size_t size, size_t nRows, size_t nCols);
665 
666  /** Transforms each row of \p B to a matrix and stores it in the
667  * tensor \p B. */
668  static void Deflatten(Tensor_t &A, const Tensor_t &B); // size_t index, size_t nRows,size_t nCols);
669 
670  /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
671  static void Rearrange(Tensor_t &out, const Tensor_t &in);
672 
673 
674  /** Backward pass for Recurrent Networks */
675  static Matrix_t & RecurrentLayerBackward(Matrix_t & state_gradients_backward, // BxH
676  Matrix_t & input_weight_gradients,
677  Matrix_t & state_weight_gradients,
678  Matrix_t & bias_gradients,
679  Matrix_t & df, //DxH
680  const Matrix_t & state, // BxH
681  const Matrix_t & weights_input, // HxD
682  const Matrix_t & weights_state, // HxH
683  const Matrix_t & input, // BxD
684  Matrix_t & input_gradient);
685 
686  // dummy RNN functions
687  static void RNNForward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
688  const Matrix_t & /* weights */, Tensor_t & /* y */, Matrix_t & /* hy */, Matrix_t & /* cy */,
689  const RNNDescriptors_t & /* descr */, RNNWorkspace_t & /* workspace */, bool /* isTraining */)
690  {
691  }
692 
693  static void RNNBackward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
694  const Tensor_t & /* y */, const Tensor_t & /* dy */, const Matrix_t & /* dhy */,
695  const Matrix_t & /* dcy */, const Tensor_t & /* weights */, Tensor_t & /* dx */,
696  Matrix_t & /* dhx */, Matrix_t & /* dcx */, Tensor_t & /* dw */,
697  const RNNDescriptors_t & /* desc */, RNNWorkspace_t & /* workspace */)
698  {
699  }
700  static Matrix_t &
701  LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*cell_gradients_backward*/,
702  Matrix_t & /*input_weight_gradients*/, Matrix_t & /*forget_weight_gradients*/,
703  Matrix_t & /*candidate_weight_gradients*/, Matrix_t & /*output_weight_gradients*/,
704  Matrix_t & /*input_state_weight_gradients*/, Matrix_t & /*forget_state_weight_gradients*/,
705  Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*output_state_weight_gradients*/,
706  Matrix_t & /*input_bias_gradients*/, Matrix_t & /*forget_bias_gradients*/,
707  Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*output_bias_gradients*/, Matrix_t & /*di*/,
708  Matrix_t & /*df*/, Matrix_t & /*dc*/, Matrix_t & /*dout*/,
709  const Matrix_t & /*precStateActivations*/, const Matrix_t & /*precCellActivations*/,
710  const Matrix_t & /*fInput*/, const Matrix_t & /*fForget*/, const Matrix_t & /*fCandidate*/,
711  const Matrix_t & /*fOutput*/, const Matrix_t & /*weights_input*/,
712  const Matrix_t & /*weights_forget*/, const Matrix_t & /*weights_candidate*/,
713  const Matrix_t & /*weights_output*/, const Matrix_t & /*weights_input_state*/,
714  const Matrix_t & /*weights_forget_state*/, const Matrix_t & /*weights_candidate_state*/,
715  const Matrix_t & /*weights_output_state*/, const Matrix_t & /*input*/,
716  Matrix_t & /*input_gradient*/, Matrix_t & /*cell_gradient*/, Matrix_t & /*cell_tanh*/)
717  {
718  Fatal("TCuda::LSTMLayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
719  return state_gradients_backward;
720  }
721 
722  /** Backward pass for GRU Network */
723  static Matrix_t &
724  GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*reset_weight_gradients*/,
725  Matrix_t & /*update_weight_gradients*/, Matrix_t & /*candidate_weight_gradients*/,
726  Matrix_t & /*reset_state_weight_gradients*/, Matrix_t & /*update_state_weight_gradients*/,
727  Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*reset_bias_gradients*/,
728  Matrix_t & /*update_bias_gradients*/, Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*dr*/,
729  Matrix_t & /*du*/, Matrix_t & /*dc*/, const Matrix_t & /*precStateActivations*/,
730  const Matrix_t & /*fReset*/, const Matrix_t & /*fUpdate*/, const Matrix_t & /*fCandidate*/,
731  const Matrix_t & /*weights_reset*/, const Matrix_t & /*weights_update*/,
732  const Matrix_t & /*weights_candidate*/, const Matrix_t & /*weights_reset_state*/,
733  const Matrix_t & /*weights_update_state*/, const Matrix_t & /*weights_candidate_state*/,
734  const Matrix_t & /*input*/, Matrix_t & /*input_gradient*/, bool)
735  {
736  Fatal("TCuda::GRULayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
737  return state_gradients_backward;
738  }
739  ///@}
740 
741  //____________________________________________________________________________
742  //
743  // Additional Arithmetic Functions
744  //____________________________________________________________________________
745 
746  /** @name Additional Arithmetic Functions
747  *
748  * Additional arithmetic on CUDA matrices used to implement the low-level
749  * interface.
750  */
751  ///@{
752 
753  /** Standard multiplication of two matrices \p A and \p B with the result being
754  * written into C.
755  */
756  static void Multiply(Matrix_t &C,
757  const Matrix_t &A,
758  const Matrix_t &B);
759  /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
760  * result being written into C.
761  */
762  static void TransposeMultiply(Matrix_t &output,
763  const Matrix_t &input,
764  const Matrix_t &Weights,
765  Scalar_t alpha = 1.0, Scalar_t beta = 0.);
766  /** In-place Hadamard (element-wise) product of matrices \p A and \p B
767  * with the result being written into \p A.
768  */
769  static void Hadamard(Tensor_t &A,
770  const Tensor_t &B);
771  static void Hadamard(Matrix_t &A,
772  const Matrix_t &B);
773  // {
774  // Tensor_t tA(A);
775  // Hadamard( tA, Tensor_t(B));
776  // }
777 
778  /** Sum columns of (m x n) matrixx \p A and write the results into the first
779  * m elements in \p A.
780  */
781  static void SumColumns(Matrix_t &B,
782  const Matrix_t &A,
783  Scalar_t alpha = 1.0, Scalar_t beta = 0.);
784 
785  /** Compute the sum of all elements in \p A */
786  static Scalar_t Sum(const Matrix_t &A);
787 
788  /** Check two matrices for equality, taking floating point arithmetic errors into account. */
789  static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
790 
791  /** Add the constant \p beta to all the elements of matrix \p A and write the
792  * result into \p A.
793  */
794  static void ConstAdd(Matrix_t &A, Scalar_t beta);
795 
796  /** Multiply the constant \p beta to all the elements of matrix \p A and write the
797  * result into \p A.
798  */
799  static void ConstMult(Matrix_t &A, Scalar_t beta);
800 
801  /** Reciprocal each element of the matrix \p A and write the result into
802  * \p A
803  */
804  static void ReciprocalElementWise(Matrix_t &A);
805 
806  /** Square each element of the matrix \p A and write the result into
807  * \p A
808  */
809  static void SquareElementWise(Matrix_t &A);
810 
811  /** Square root each element of the matrix \p A and write the result into
812  * \p A
813  */
814  static void SqrtElementWise(Matrix_t &A);
815 
816  // optimizer functions
817  static void AdamUpdate(Matrix_t & A, const Matrix_t & M, const Matrix_t & V, Scalar_t alpha, Scalar_t eps);
818  static void AdamUpdateFirstMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta);
819  static void AdamUpdateSecondMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta);
820 
821  // printing of tensor
822  static void PrintTensor( const Tensor_t & A, const std::string name = "Cuda-tensor", bool = false);
823 
824  ///////////////////////////////////////////////////////////////////////////////
825  /// extra functions defined only for CPU architecture !!!
826  //////////////////////////////////////////////////////////////////////////////
827 
828  /** Sum rows of (m x n) matrix \p A and write the results into the first
829  * m elements in \p B.
830  */
831  static void SumRows(Matrix_t & B, const Matrix_t & A);
832 
833 
834 };
835 
836 //____________________________________________________________________________
837 template <typename AFloat>
838 template <typename AMatrix_t>
840  const AMatrix_t &A)
841 {
842  // copy from another architecture using the reference one
843  // this is not very efficient since creates temporary objects
844  TMatrixT<AFloat> tmp = A;
845  Copy(B, TCudaMatrix<AFloat>(tmp) );
846 }
847 
848 //____________________________________________________________________________
849 template <typename AFloat>
850 template <typename AMatrix_t>
852  const std::vector<AMatrix_t> &A)
853 {
854  for (size_t i = 0; i < B.size(); ++i) {
855  CopyDiffArch(B[i], A[i]);
856  }
857 }
858 
859 template <typename AFloat>
860 void TCuda<AFloat>::PrintTensor(const typename TCuda<AFloat>::Tensor_t & A, const std::string name, bool )
861 {
862  std::cout << name << " size = " << A.GetSize() << " shape = { ";
863  auto shape = A.GetShape();
864  for (size_t k = 0; k < shape.size()-1; ++k)
865  std::cout << shape[k] << " , ";
866  std::cout << shape.back() << " } ";
867  std::cout << " strides = { ";
868  auto strides = A.GetStrides();
869  for (size_t k = 0; k < strides.size()-1; ++k)
870  std::cout << strides[k] << " , ";
871  std::cout << strides.back() << " }\n ";
872 
873  if (A.GetShape().size() == 2 ) {
874  for (size_t i = 0; i < A.GetShape()[0]; ++i) {
875  std::cout << "{ ";
876  for (size_t j = 0; j < A.GetShape()[1]; ++j) {
877  std::cout << A(i,j) << " ";
878  }
879  std::cout << " } " << std::endl;
880  }
881  } else if (A.GetShape().size() == 3 ) {
882  for (size_t i = 0; i < A.GetFirstSize(); ++i) {
883  std::cout << "{ ";
884  for (size_t j = 0; j < A.GetHSize(); ++j) {
885  std::cout << "{ ";
886  for (size_t k = 0; k < A.GetWSize(); ++k) {
887  std::cout << A(i,j,k) << " ";
888  }
889  std::cout << " } " << std::endl;
890  }
891  std::cout << " } " << std::endl;
892  }
893  }
894  else {
895  for (size_t l = 0; l < A.GetSize(); ++l) {
896  std::cout << A.GetData()[l] << " ";
897  }
898  std::cout << "\n";
899  }
900 }
901 
902 
903 } // namespace DNN
904 } // namespace TMVA
905 
906 #endif
TMVA::Experimental::MemoryLayout::ColumnMajor
@ ColumnMajor
TMVA::DNN::TCuda::CreateTensor
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
Definition: Cuda.h:121
c
#define c(i)
Definition: RSha256.hxx:119
l
auto * l
Definition: textangle.C:4
TMVA::DNN::CudaDataType
Definition: Cuda.h:51
ROOT::Math::Cephes::gamma
double gamma(double x)
Definition: SpecFuncCephes.cxx:339
TMVA::DNN::TCudaDeviceBuffer
TCudaDeviceBuffer.
Definition: CudaBuffers.h:40
TMVA::DNN::TCuda::InitializeZero
static void InitializeZero(Matrix_t &A)
n
const Int_t n
Definition: legend1.C:16
TMVA::DNN::TCuda::CreateTensor
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cuda.h:111
TMVA::DNN::TCuda::Im2col
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
TMVA::DNN::TCuda::Multiply
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
TMVA::DNN::TCuda::SqrtElementWise
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
ContextHandles.h
TMVA::DNN::CNN::TCNNWorkspace
Definition: ContextHandles.h:74
TMVA::DNN::TCuda::InitializeGRUTensors
static void InitializeGRUTensors(GenLayer_t *)
Definition: Cuda.h:187
TMVA::DNN::TCuda::Sigmoid
static void Sigmoid(Tensor_t &B)
TMVA::DNN::TCuda::Deflatten
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
TMVA::DNN::TCuda::AlmostEquals
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
TMVA::DNN::TCuda::ActivationFunctionForward
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
TMVA::DNN::CudaConvolutionDescriptor
Definition: Cuda.h:45
TMVA::DNN::TCuda::GRULayerBackward
static Matrix_t & GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
Backward pass for GRU Network.
Definition: Cuda.h:724
TMVA::DNN::CNN::TMaxPoolLayer
Definition: MaxPoolLayer.h:122
TMVA::DNN::TCuda::DeviceBuffer_t
TCudaDeviceBuffer< AFloat > DeviceBuffer_t
Definition: Cuda.h:75
TMVA::DNN::TCuda::InitializeIdentity
static void InitializeIdentity(Matrix_t &A)
TMVA::DNN::TCuda::ConvWorkspace_t
CNN::TCNNWorkspace< ConvLayer_t > ConvWorkspace_t
Definition: Cuda.h:101
TMVA::DNN::TCuda::AFloat
AReal AFloat
Definition: Cuda.h:70
TMVA::DNN::TCuda::MultiplyTranspose
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
TMVA::DNN::TCuda::AddL2RegularizationGradients
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
TMVA::DNN::TDNNGenDescriptors
Definition: ContextHandles.h:49
ROOT::Math::GSLSimAn::Copy
void Copy(void *source, void *dest)
Definition: GSLSimAnnealing.cxx:159
TMVA::DNN::CNN::TConvParams
Definition: ConvLayer.h:107
TMVA::DNN::CudaFilterDescriptor
Definition: Cuda.h:44
TMVA::DNN::TCuda
The TCuda architecture class.
Definition: Cuda.h:64
TMVA::DNN::TCuda::DropoutBackward
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
Definition: Cuda.h:469
TMVA::DNN::TCuda::RNNForward
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
Definition: Cuda.h:687
TMVA::DNN::TCuda::IsCudnn
static bool IsCudnn()
Definition: Cuda.h:139
TMVA::DNN::TCuda::InitializeConvWorkspace
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
Definition: Cuda.h:163
TMVA::DNN::TCuda::TensorDescriptor_t
DummyCudaDataType TensorDescriptor_t
Definition: Cuda.h:84
output
static void output(int code)
Definition: gifencode.c:226
TMVA::DNN::TCuda::InitializeUniform
static void InitializeUniform(Matrix_t &A)
TMVA::DNN::TCuda::L1Regularization
static Scalar_t L1Regularization(const Matrix_t &W)
TMVA::DNN::TCuda::DropoutForward
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
TMVA::DNN::TCuda::GaussDerivative
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::DNN::TCuda::AdamUpdateSecondMom
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
TMVA::DNN::TCuda::InitializeConvDescriptors
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
Definition: Cuda.h:152
width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
TMVA::DNN::TCuda::Im2colIndices
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
TMVA::DNN::CNN::TConvLayer
Definition: ConvLayer.h:138
TMVA::DNN::TCuda::TanhDerivative
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::Experimental::MemoryLayout
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:59
x
Double_t x[n]
Definition: legend1.C:17
TMVA::DNN::CudaEmptyDescriptor
Definition: Cuda.h:54
TMVA::DNN::TCuda::Reshape
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
TMVA::DNN::TCuda::SymmetricRelu
static void SymmetricRelu(Tensor_t &B)
TMVA::DNN::RNN::TRNNDescriptors
Definition: ContextHandles.h:107
TMVA::DNN::TCuda::BatchNormLayerBackward
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170
TMVA::DNN::TCuda::ConvLayerForward
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
TMVA::DNN::TCuda::Rearrange
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
TMVA::DNN::CNN::TCNNDescriptors
Definition: ContextHandles.h:63
TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
TMVA::DNN::TCuda::MultiplyTranspose
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
Definition: Cuda.h:208
TMatrixT
Definition: TMatrixDfwd.h:22
TMVA::DNN::TCuda::RecurrentLayerBackward
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
TMVA::DNN::TCuda::CreateTensor
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
Definition: Cuda.h:114
TMVA::DNN::TCuda::Scalar_t
AFloat Scalar_t
Definition: Cuda.h:71
ROOT::Math::beta
double beta(double x, double y)
Calculates the beta function.
Definition: SpecFuncMathCore.cxx:111
TMVA::DNN::TCuda::CreateWeightTensors
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
Definition: Cuda.h:132
b
#define b(i)
Definition: RSha256.hxx:118
TMVA::DNN::TCuda::CrossEntropy
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
TMVA::DNN::TCuda::SigmoidDerivative
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::DNN::TCuda::AddRowWise
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
Definition: Cuda.h:217
TMVA::DNN::TCuda::RNNBackward
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
Definition: Cuda.h:693
TMVA::DNN::TCuda::InitializeLSTMDescriptors
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:178
TMVA::DNN::TCuda::Flatten
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
ROOT::Math::Cephes::C
static double C[]
Definition: SpecFuncCephes.cxx:187
TMVA::DNN::TCuda::SumColumns
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
TMVA::DNN::TCuda::GetRandomGenerator
static TRandom & GetRandomGenerator()
TMVA::DNN::TCuda::FreePoolDropoutWorkspace
static void FreePoolDropoutWorkspace(TWorkspace *&)
Definition: Cuda.h:175
TMVA::DNN::VGeneralLayer
Definition: GeneralLayer.h:93
TMVA::DNN::TCuda::ConstMult
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
TMVA::DNN::TCuda::ReleasePoolDescriptors
static void ReleasePoolDescriptors(TDescriptors *&)
Definition: Cuda.h:160
TMVA::DNN::TCuda::CopyDiffArch
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
TMVA::DNN::CudaConvolutionFwdAlgo
Definition: Cuda.h:48
TMVA::DNN::TCuda::SquareElementWise
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
TMVA::DNN::TCuda::CalculateConvWeightGradients
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
TMVA::DNN::TCuda::InitializeGRUWorkspace
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:183
TMVA::DNN::TWorkspace
Definition: ContextHandles.h:44
TMVA::DNN::TCuda::Sum
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
TMVA::DNN::TCuda::AdamUpdate
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
TMVA::DNN::TCuda::Hadamard
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
TMVA::DNN::TCuda::InitializeLSTMWorkspace
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:182
TMVA::DNN::TCuda::InitializeRNNTensors
static void InitializeRNNTensors(GenLayer_t *)
Definition: Cuda.h:185
TRandom
Definition: TRandom.h:27
TMVA::DNN::TCuda::ReleaseBNormDescriptors
static void ReleaseBNormDescriptors(TDescriptors *&)
Definition: Cuda.h:161
TMVA::DNN::TCuda::SoftmaxCrossEntropyGradients
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
TMVA::DNN::TCuda::PoolingDescriptors_t
CNN::TCNNDescriptors< PoolingLayer_t > PoolingDescriptors_t
Definition: Cuda.h:103
h
#define h(i)
Definition: RSha256.hxx:124
TMVA::DNN::TCuda::InitializePoolDropoutWorkspace
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
Definition: Cuda.h:167
TMVA::DNN::TCuda::DropoutForward
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition: Cuda.h:464
TMVA::DNN::TCuda::SymmetricReluDerivative
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::DNN::TCuda::CalculateConvActivationGradients
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
epsilon
REAL epsilon
Definition: triangle.c:617
TMVA::DNN::TCuda::AddL1RegularizationGradients
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
TMVA::DNN::TCuda::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Fatal
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition: TError.cxx:246
TMVA::DNN::TCuda::CreateTensor
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
Definition: Cuda.h:118
TMVA::DNN::TCuda::BatchNormLayerForwardTraining
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
TMVA::DNN::TCuda::Tensor_t
TCudaTensor< AFloat > Tensor_t
Definition: Cuda.h:74
ConvLayer.h
TMVA::DNN::TCuda::ReleaseRNNDescriptors
static void ReleaseRNNDescriptors(TDescriptors *&)
Definition: Cuda.h:189
TMVA::DNN::TCuda::CalculateConvBiasGradients
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
TMVA::DNN::TCuda::SetRandomSeed
static void SetRandomSeed(size_t seed)
TMVA::DNN::TCuda::InitializePoolDescriptors
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
Definition: Cuda.h:154
BatchNormLayer.h
TMVA::DNN::TCuda::SoftSignDerivative
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::DNN::RNN::TRNNWorkspace
Definition: ContextHandles.h:128
TMVA::DNN::TCuda::PoolingWorkspace_t
CNN::TCNNWorkspace< PoolingLayer_t > PoolingWorkspace_t
Definition: Cuda.h:104
TMVA::DNN::TCuda::InitializeRNNDescriptors
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:177
TMVA::DNN::TCuda::MeanSquaredErrorGradients
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
y
Double_t y[n]
Definition: legend1.C:17
TMVA::DNN::TCuda::ActivationFunctionBackward
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
TMVA::DNN::TCuda::FastTanhDerivative
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
Definition: Cuda.h:326
TMVA::DNN::TCuda::calculateDimension
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
CudaMatrix.h
TMVA::DNN::TCuda::LSTMLayerBackward
static Matrix_t & LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
Definition: Cuda.h:701
TMVA::DNN::TCuda::Gauss
static void Gauss(Tensor_t &B)
MaxPoolLayer.h
TMVA::DNN::TDescriptors
Definition: ContextHandles.h:41
TMVA::DNN::TCuda::InitializeGauss
static void InitializeGauss(Matrix_t &A)
TMVA::DNN::CudaConvolutionBwdDataAlgo
Definition: Cuda.h:49
TMVA::DNN::CudaDropoutDescriptor
Definition: Cuda.h:46
TMVA::DNN::TCuda::AdamUpdateFirstMom
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
CudaTensor.h
TMVA::DNN::TCuda::AddConvBiases
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
TMVA::DNN::TCuda::GetTensorLayout
static TMVA::Experimental::MemoryLayout GetTensorLayout()
Definition: Cuda.h:109
TMVA::DNN::TCuda::Relu
static void Relu(Tensor_t &B)
TMVA::DNN::TCuda::FreeRNNWorkspace
static void FreeRNNWorkspace(TWorkspace *&)
Definition: Cuda.h:190
TMVA::DNN::TCudaTensor::GetMatrix
TCudaMatrix< AFloat > GetMatrix() const
Definition: CudaTensor.h:310
TMVA::DNN::CudaConvolutionBwdFilterAlgo
Definition: Cuda.h:50
TMVA::DNN::TCuda::RotateWeights
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
Functions.h
TMVA::DNN::CudaPoolingDescriptor
Definition: Cuda.h:47
TMVA::DNN::TCuda::InitializeGlorotNormal
static void InitializeGlorotNormal(Matrix_t &A)
TMVA::DNN::TCuda::CrossEntropyGradients
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
TMVA::DNN::TCuda::Im2colFast
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
TMVA::DNN::TCuda::PrepareInternals
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition: Cuda.h:545
TMVA::DNN::TCudaTensor
TCudaTensor Class.
Definition: CudaTensor.h:89
TMVA::DNN::TCuda::InitializeLSTMTensors
static void InitializeLSTMTensors(GenLayer_t *)
Definition: Cuda.h:186
TMVA::DNN::TCuda::PrintTensor
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
Definition: Cuda.h:860
TMVA::DNN::TCuda::fgRandomGen
static TRandom * fgRandomGen
Definition: Cuda.h:67
TMVA::DNN::TCuda::ConstAdd
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
name
char name[80]
Definition: TGX11.cxx:110
TMVA::DNN::TCuda::BatchNormLayerForwardInference
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
TMVA::DNN::TCuda::InitializeGRUDescriptors
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:179
TMVA::DNN::TCuda::ReluDerivative
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
TMVA::DNN::TCuda::Softmax
static void Softmax(Matrix_t &YHat, const Matrix_t &)
TMVA::DNN::TBatchNormLayer
Definition: BatchNormLayer.h:106
TMVA::DNN::TCuda::MaxPoolLayerBackward
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
TMVA::DNN::TCuda::ScaleAdd
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
DataLoader.h
TMVA::DNN::TCuda::InitializeGlorotUniform
static void InitializeGlorotUniform(Matrix_t &A)
TMVA::DNN::TCuda::Matrix_t
TCudaMatrix< AFloat > Matrix_t
Definition: Cuda.h:73
TMVA::DNN::TCuda::ReciprocalElementWise
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
TMVA::DNN::TCuda::InitializeRNNWorkspace
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:181
TMVA::DNN::TCuda::TransposeMultiply
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
TMVA::DNN::DummyCudaDataType
Definition: Cuda.h:52
TMVA::DNN::TCuda::InitializeActivationDescriptor
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
Definition: Cuda.h:156
ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178
TMVA::DNN::TCudaMatrix
TCudaMatrix Class.
Definition: CudaMatrix.h:105
TMVA::DNN::TCuda::Tanh
static void Tanh(Tensor_t &B)
TMVA::DNN::TCuda::InitializeBNormDescriptors
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
Definition: Cuda.h:147
TMVA::DNN::TCuda::ConvDescriptors_t
CNN::TCNNDescriptors< ConvLayer_t > ConvDescriptors_t
Definition: Cuda.h:100
TMVA::DNN::TCuda::SoftSign
static void SoftSign(Tensor_t &B)
TMVA::DNN::TCuda::FastTanh
static void FastTanh(Tensor_t &B)
Definition: Cuda.h:325
TMVA::DNN::TCuda::ReleaseConvDescriptors
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
Definition: Cuda.h:159
TMVA::DNN::TCuda::Backward
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
TMVA::DNN::TCudaHostBuffer
TCudaHostBuffer.
Definition: CudaBuffers.h:54
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:43
TMVA::DNN::TCuda::SumRows
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
TMVA::DNN::CudaActivationDescriptor
Definition: Cuda.h:43
TMVA::DNN::TCuda::ReleaseDescriptor
static void ReleaseDescriptor(ActivationDescriptor_t &)
Definition: Cuda.h:172
TMVA::DNN::TCuda::ActivationDescriptor_t
CudaActivationDescriptor ActivationDescriptor_t
Definition: Cuda.h:78
TMVA::DNN::TCuda::ConvLayerBackward
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
TMVA::DNN::TCuda::SoftmaxCrossEntropy
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
TMVA::DNN::TCuda::AddRowWise
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
TMVA::DNN::TCuda::IdentityDerivative
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::DNN::TCuda::Downsample
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
TMVA::DNN::TCuda::MeanSquaredError
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
CudaBuffers.h
TMVA::DNN::TCuda::L2Regularization
static Scalar_t L2Regularization(const Matrix_t &W)
Error
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition: TError.cxx:188