Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
Cuda.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 05/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////
13// Definition of the TCuda architecture class, which provides an //
14// implementation of the low-level functionality for neural //
15// networks for the CUDA computing architectures. //
16///////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_CUDA
19#define TMVA_DNN_ARCHITECTURES_CUDA
20
21#include "TMVA/DNN/Functions.h"
26
27
28#include "cuda.h"
29#include "Cuda/CudaBuffers.h"
30#include "Cuda/CudaMatrix.h"
31#include "Cuda/CudaTensor.h"
32#include "TMVA/DNN/DataLoader.h"
33#include <utility>
34#include <vector>
35#include <string>
36
37class TRandom;
38
39namespace TMVA
40{
41namespace DNN
42{
51 struct CudaDataType {};
53
55
56/** The TCuda architecture class.
57 *
58 * Low-level interface class for CUDA computing architectures. Contains as
59 * public types the declaration of the scalar, matrix and buffer types
60 * for this architecture as well as the remaining functions in the low-level
61 * interface in the form of static members.
62 */
63template<typename AReal = Float_t>
64class TCuda
65{
66private:
68public:
69
70 using AFloat = AReal;
72
77
82 //using OpTensorDescriptor_t = CudaOpTensorDescriptor;
85 //using ReductionDescriptor_t = CudaReduceTensorDescriptor;
92
93 using EmptyDescriptor_t = CudaEmptyDescriptor; // Used if a descriptor is not needed in a class
94
98
105
108
109 static TMVA::Experimental::MemoryLayout GetTensorLayout() { return TMVA::Experimental::MemoryLayout::ColumnMajor; }
110
111 static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
112 return Tensor_t( {c,h*w,n}, GetTensorLayout());
113 }
114 static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
115 {
116 return Tensor_t( {t, w, b}, GetTensorLayout());
117 }
118 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
119 return Tensor_t( buffer, {c,h*w, n}, GetTensorLayout(), 0, 0);
120 }
121 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
122 {
123 return Tensor_t(buffer, {t, w, b}, GetTensorLayout());
124 }
125
126 // create a weight tensor/matrix from another tensor using its shape
127 // static Matrix_t CreateWeightTensor( Matrix_t & A) {
128 // return Matrix_t( A.GetNrows(), A.GetNcols());
129 // }
130 // create a weight tensor/matrix vector from another tensor/weight vector using the given tensor shapes
131 // this function is used by the optimizers to store intermediate weights representations
132 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
133 if (!newWeights.empty()) newWeights.clear();
134 size_t n = weights.size();
135 for (size_t i = 0; i < n; ++i)
136 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
137 }
138
139 static bool IsCudnn() { return false; }
140 //____________________________________________________________________________
141 //
142 // Architecture Initialization
143 //____________________________________________________________________________
144
145 /** Initialize CNN data/operator descriptors. Not used at the moment.*/
146
147 static void InitializeBNormDescriptors(TDescriptors * & /*descriptors*/,
148 BNormLayer_t */*L = nullptr*/) {
149 Error("InitializeBNormDescriptrs", "Batch normalization on GPU is supported only with Cudnn");
150 }
151
152 static void InitializeConvDescriptors(TDescriptors *& /*descriptors*/, ConvLayer_t * /*L = nullptr*/) {}
153
154 static void InitializePoolDescriptors(TDescriptors *& /*descriptors*/, PoolingLayer_t * /*L = nullptr*/) {}
155
156 static void InitializeActivationDescriptor(ActivationDescriptor_t &/*descriptors*/, EActivationFunction /*activFunc */ , double /*coef*/ = 0.0) {}
157
158 /** Release CNN data/operator descriptors. Not used at the moment.*/
159 static void ReleaseConvDescriptors(TDescriptors * & /*descriptors*/) {}
160 static void ReleasePoolDescriptors(TDescriptors * & /*descriptors*/) {}
161 static void ReleaseBNormDescriptors(TDescriptors *& /*descriptors*/) {}
162
163 static void InitializeConvWorkspace(TWorkspace * & /*workspace*/,
164 TDescriptors * & /*descriptors*/,
165 const DNN::CNN::TConvParams & /*params*/,
166 ConvLayer_t */*L = nullptr*/) {}
167 static void InitializePoolDropoutWorkspace(TWorkspace * & /*workspace*/,
168 TDescriptors * & /*descriptors*/,
169 const DNN::CNN::TConvParams & /*params*/,
170 PoolingLayer_t */*L = nullptr*/) {}
171
172 static void ReleaseDescriptor(ActivationDescriptor_t & /*activationDescr*/) {}
173
174 static void FreeConvWorkspace(TWorkspace * & /*workspace*/) {} ///< Only used for certain cudnn on-device memory
175 static void FreePoolDropoutWorkspace(TWorkspace * & /*workspace*/) {}
176
177 static void InitializeRNNDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
178 static void InitializeLSTMDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
179 static void InitializeGRUDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
180
181 static void InitializeRNNWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
182 static void InitializeLSTMWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
183 static void InitializeGRUWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
184
185 static void InitializeRNNTensors(GenLayer_t * /*layer*/) {}
186 static void InitializeLSTMTensors(GenLayer_t * /*layer*/) {}
187 static void InitializeGRUTensors(GenLayer_t * /*layer*/) {}
188
189 static void ReleaseRNNDescriptors(TDescriptors *& /*descriptors*/) {}
190 static void FreeRNNWorkspace(TWorkspace *& /*workspace*/) {}
191
192 //static void InitializeRNNTensors(RNNLayer_t * /*layer*/) {}
193
194 //____________________________________________________________________________
195 //
196 // Propagation
197 //____________________________________________________________________________
198
199 /** @name Forward Propagation
200 * Low-level functions required for the forward propagation of activations
201 * through the network.
202 */
203 ///@{
204 /** Matrix-multiply \p input with the transpose of \p weights and
205 * write the results into \p output. */
206 static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights);
207
208 static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) {
209 Matrix_t output_matrix = output.GetMatrix();
210 MultiplyTranspose( output_matrix, input.GetMatrix(), weights);
211 //ensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
212 }
213
214 /** Add the vectors biases row-wise to the matrix output */
215 static void AddRowWise(Matrix_t &output,const Matrix_t &biases);
216
217 static void AddRowWise(Tensor_t &output, const Matrix_t &biases) {
218 Matrix_t output_matrix = output.GetMatrix();
219 AddRowWise(output_matrix, biases);
220 //Tensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
221 }
222
223 /** @name Backward Propagation (Dense Layers)
224 * Low-level functions required for the forward propagation of activations
225 * through the network.
226 */
227 ///@{
228 /** Perform the complete backward propagation step. If the provided
229 * \p activationGradientsBackward matrix is not empty, compute the
230 * gradients of the objective function with respect to the activations
231 * of the previous layer (backward direction).
232 * Also compute the weight and the bias gradients. Modifies the values
233 * in \p df and thus produces only a valid result, if it is applied the
234 * first time after the corresponding forward propagation has been per-
235 * formed. */
236 static void Backward(Tensor_t & activationGradientsBackward,
237 Matrix_t & weightGradients,
238 Matrix_t & biasGradients,
239 const Tensor_t & df,
240 const Tensor_t & activationGradients,
241 const Matrix_t & weights,
242 const Tensor_t & activationBackward);
243
244 /** Adds a the elements in matrix B scaled by c to the elements in
245 * the matrix A. This is required for the weight update in the gradient
246 * descent step.*/
247 static void ScaleAdd(Matrix_t & A,
248 const Matrix_t & B,
249 Scalar_t beta = 1.0);
250
251 static void Copy(Matrix_t & B,
252 const Matrix_t & A);
253
254 // copy from another type of matrix
255 template<typename AMatrix_t>
256 static void CopyDiffArch(Matrix_t & B, const AMatrix_t & A);
257
258
259 /** Above functions extended to vectors */
260 static void ScaleAdd(Tensor_t & A,
261 const Tensor_t & B,
262 Scalar_t beta = 1.0);
263
264 static void Copy(Tensor_t & A,
265 const Tensor_t & B);
266
267 // copy from another tensor
268 template<typename ATensor_t>
269 static void CopyDiffArch(Tensor_t & A,
270 const ATensor_t & B);
271
272 // copy from vector of matrices of different types
273 template<typename AMatrix_t>
274 static void CopyDiffArch(std::vector<Matrix_t> & A,
275 const std::vector<AMatrix_t> & B);
276
277 ///@}
278
279 //____________________________________________________________________________
280 //
281 // Activation Functions
282 //____________________________________________________________________________
283
284 /** @name Activation Functions
285 * For each activation function, the low-level interface contains two routines.
286 * One that applies the activation function to a matrix and one that evaluate
287 * the derivatives of the activation function at the elements of a given matrix
288 * and writes the results into the result matrix.
289 */
290 ///@{
291 /* impl using Matrix */
292 /*inline void evaluate(Matrix_t &A, EActivationFunction f)
293 {
294 Tensor_t tA(A);
295 evaluate<TCuda<AReal>>(tA,f);
296 }*/
298 const ActivationDescriptor_t activationDescr,
299 const double coef = 0.0, const AFloat alpha = 1,
300 const AFloat beta = 0);
301
302 /** Computes the gradient of the activation function */
303 static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
304 const Tensor_t & dY, const Tensor_t & X,
305 EActivationFunction activFunct,
306 const ActivationDescriptor_t activationDescr,
307 const AFloat alpha = 1,
308 const AFloat beta = 0);
309
310 static void IdentityDerivative(Tensor_t & B,
311 const Tensor_t &A);
312
313 static void Relu(Tensor_t & B);
314 static void ReluDerivative(Tensor_t & B,
315 const Tensor_t & A);
316
317 static void Sigmoid(Tensor_t & B);
318 static void SigmoidDerivative(Tensor_t & B,
319 const Tensor_t & A);
320
321 static void Tanh(Tensor_t & B);
322 static void TanhDerivative(Tensor_t & B,
323 const Tensor_t & A);
324
325 static void FastTanh(Tensor_t &B) { return Tanh(B); }
326 static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A) { return TanhDerivative(B, A); }
327
328 static void SymmetricRelu(Tensor_t & B);
329 static void SymmetricReluDerivative(Tensor_t & B,
330 const Tensor_t & A);
331
332 static void SoftSign(Tensor_t & B);
333 static void SoftSignDerivative(Tensor_t & B,
334 const Tensor_t & A);
335
336 static void Gauss(Tensor_t & B);
337 static void GaussDerivative(Tensor_t & B,
338 const Tensor_t & A);
339 ///@}
340
341 //____________________________________________________________________________
342 //
343 // Loss Functions
344 //____________________________________________________________________________
345
346 /** @name Loss Functions
347 * Loss functions compute a scalar value given the \p output of the network
348 * for a given training input and the expected network prediction \p Y that
349 * quantifies the quality of the prediction. For each function also a routing
350 * that computes the gradients (suffixed by Gradients) must be provided for
351 * the starting of the backpropagation algorithm.
352 */
353 ///@{
354
355 static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output,
356 const Matrix_t &weights);
357 static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
358 const Matrix_t &output, const Matrix_t &weights);
359
360 /** Sigmoid transformation is implicitly applied, thus \p output should
361 * hold the linear activations of the last layer in the net. */
362 static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output,
363 const Matrix_t &weights);
364
365 static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
366 const Matrix_t &output, const Matrix_t &weights);
367
368 /** Softmax transformation is implicitly applied, thus \p output should
369 * hold the linear activations of the last layer in the net. */
370 static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output,
371 const Matrix_t &weights);
372 static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
373 const Matrix_t &output, const Matrix_t &weights);
374 ///@}
375
376 //____________________________________________________________________________
377 //
378 // Output Functions
379 //____________________________________________________________________________
380
381 /** @name Output Functions
382 * Output functions transform the activations \p output of the
383 * output layer in the network to a valid prediction \p YHat for
384 * the desired usage of the network, e.g. the identity function
385 * for regression or the sigmoid transformation for two-class
386 * classification.
387 */
388 ///@{
389 static void Sigmoid(Matrix_t &YHat,
390 const Matrix_t & );
391 static void Softmax(Matrix_t &YHat,
392 const Matrix_t & );
393 ///@}
394
395 //____________________________________________________________________________
396 //
397 // Regularization
398 //____________________________________________________________________________
399
400 /** @name Regularization
401 * For each regularization type two functions are required, one named
402 * <tt>`<Type>`Regularization</tt> that evaluates the corresponding
403 * regularization functional for a given weight matrix and the
404 * <tt>Add`<Type>`RegularizationGradients</tt>, that adds the regularization
405 * component in the gradients to the provided matrix.
406 */
407 ///@{
408
409 static Scalar_t L1Regularization(const Matrix_t & W);
411 const Matrix_t & W,
413
414 static Scalar_t L2Regularization(const Matrix_t & W);
416 const Matrix_t & W,
418 ///@}
419
420 //____________________________________________________________________________
421 //
422 // Initialization
423 //____________________________________________________________________________
424
425 /** @name Initialization
426 * For each initialization method, one function in the low-level interface
427 * is provided. The naming scheme is <p>Initialize`<Type>`</p> for a given
428 * initialization method Type.
429 */
430 ///@{
431
432 static void InitializeGauss(Matrix_t & A);
433 static void InitializeUniform(Matrix_t & A);
434 static void InitializeIdentity(Matrix_t & A);
435 static void InitializeZero(Matrix_t & A);
436 static void InitializeZero(Tensor_t &A);
437 static void InitializeGlorotNormal(Matrix_t & A);
438 static void InitializeGlorotUniform(Matrix_t & A);
439
440 // return static instance of random generator used for initialization
441 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
442 static TRandom & GetRandomGenerator();
443 // set random seed for the static generator
444 // if the static generator does not exists it is created
445 static void SetRandomSeed(size_t seed);
446 ///@}
447
448 //____________________________________________________________________________
449 //
450 // Dropout
451 //____________________________________________________________________________
452
453 /** @name Dropout
454 */
455 ///@{
456
457 /** Apply dropout with activation probability \p p to the given
458 * tensor \p A and scale the result by reciprocal of \p p. */
459 static void DropoutForward(Tensor_t & A,
460 TDescriptors * descriptors,
461 TWorkspace * workspace,
462 Scalar_t p);
463
464 static void DropoutForward(Matrix_t & A, Scalar_t p) {
465 Tensor_t tA(A);
466 DropoutForward( tA, static_cast<TDescriptors *> (nullptr), static_cast<TWorkspace *> (nullptr), p );
467 }
468
469 static void DropoutBackward(Tensor_t & /* A */,
470 TDescriptors * /*descriptors */,
471 TWorkspace * /* workspace */ ) {}
472 ///@}
473
474 //____________________________________________________________________________
475 //
476 // Batch Normalization
477 //____________________________________________________________________________
478
479 /** @name Batch Normalization Layer Propagation
480 */
481 ///@{
482
483 /** The input from each batch are normalized during training to have zero mean and unit variance
484 * and they are then scaled by two parameter, different for each input variable:
485 * - a scale factor `\gamma` gamma
486 * - an offset `\beta` beta */
487
488 static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
489 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
490 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
491 Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
492
493 /** During inference the inputs are not normalized using the batch mean but the previously computed
494 * at running mean and variance */
495
496 static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y,
497 const Matrix_t &runningMeans, const Matrix_t &runningVars,
498 Scalar_t epsilon, const TensorDescriptor_t &);
499
500 static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
501 Matrix_t &gamma, // Matrix_t &beta, (not needed)
502 Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
503 const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
504
505 //____________________________________________________________________________
506 //
507 // Convolutional Layer Propagation
508 //____________________________________________________________________________
509
510 /** @name Forward Propagation in Convolutional Layer
511 */
512 ///@{
513
514 /** Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters. */
515 static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
516
517 /** Transform the matrix B in local view format, suitable for
518 * convolution, and store it in matrix A */
519 static void Im2col(Matrix_t &A,
520 const Matrix_t &B,
521 size_t imgHeight,
522 size_t imgWidth,
523 size_t fltHeight,
524 size_t fltWidth,
525 size_t strideRows,
526 size_t strideCols,
527 size_t zeroPaddingHeight,
528 size_t zeroPaddingWidth);
529
530 static void Im2colIndices(std::vector<int> &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight,
531 size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
532 size_t zeroPaddingWidth);
533 static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector<int> & V);
534
535 /** Rotates the matrix \p B, which is representing a weights,
536 * and stores them in the matrix \p A. */
537 static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight,
538 size_t filterWidth, size_t numFilters);
539
540 /** Add the biases in the Convolutional Layer. */
541 static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
542 ///@}
543
544 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
545 static void PrepareInternals(Tensor_t &) {}
546
547 /** Forward propagation in the Convolutional layer */
548 static void ConvLayerForward(Tensor_t & output,
549 Tensor_t & inputActivationFunc,
550 const Tensor_t &input,
551 const Matrix_t &weights, const Matrix_t & biases,
552 const DNN::CNN::TConvParams & params, EActivationFunction activFunc,
553 Tensor_t & /* inputPrime */,
554 const ConvDescriptors_t & /*descriptors*/, // Empty struct for cuda architecture
555 ConvWorkspace_t & /*workspace*/); // Empty struct for cuda architecture
556 //void * cudnnWorkspace = nullptr); // Remains nullptr for cuda architecture
557 /** @name Backward Propagation in Convolutional Layer
558 */
559 ///@{
560
561 /** Perform the complete backward propagation step in a Convolutional Layer.
562 * If the provided \p activationGradientsBackward matrix is not empty, compute the
563 * gradients of the objective function with respect to the activations
564 * of the previous layer (backward direction).
565 * Also compute the weight and the bias gradients. Modifies the values
566 * in \p df and thus produces only a valid result, if it is applied the
567 * first time after the corresponding forward propagation has been per-
568 * formed. */
569 static void ConvLayerBackward(Tensor_t &activationGradientsBackward,
570 Matrix_t &weightGradients, Matrix_t &biasGradients,
571 Tensor_t &df,
572 Tensor_t &activationGradients,
573 const Matrix_t &weights,
574 const Tensor_t &activationBackward,
575 const Tensor_t & outputTensor,
576 EActivationFunction activFunc,
577 const ConvDescriptors_t & /*descriptors*/,
578 ConvWorkspace_t & /*workspace*/,
579 size_t batchSize, size_t inputHeight,
580 size_t inputWidth, size_t depth,
581 size_t height, size_t width,
582 size_t filterDepth, size_t filterHeight,
583 size_t filterWidth, size_t nLocalViews );
584
585 /** Utility function for calculating the activation gradients of the layer
586 * before the convolutional layer. */
587 static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward,
588 const Tensor_t &df,
589 const Matrix_t &weights, size_t batchSize,
590 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
591 size_t width, size_t filterDepth, size_t filterHeight,
592 size_t filterWidth);
593
594 /** Utility function for calculating the weight gradients of the convolutional
595 * layer. */
596 static void CalculateConvWeightGradients(Matrix_t &weightGradients,
597 const Tensor_t &df,
598 const Tensor_t &activations_backward,
599 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
600 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
601 size_t filterWidth, size_t nLocalViews);
602
603 /** Utility function for calculating the bias gradients of the convolutional
604 * layer */
605 static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df,
606 size_t batchSize, size_t depth, size_t nLocalViews);
607 ///@}
608
609 //____________________________________________________________________________
610 //
611 // Max Pooling Layer Propagation
612 //____________________________________________________________________________
613 /** @name Forward Propagation in Max Pooling Layer
614 */
615 ///@{
616
617 /** Downsample the matrix \p C to the matrix \p A, using max
618 * operation, such that the winning indices are stored in matrix
619 * \p B. */
620 static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C,
621 const PoolingDescriptors_t & /*descriptors*/,
622 PoolingWorkspace_t & /*workspace*/,
623 size_t imgHeight, size_t imgWidth, size_t fltHeight,
624 size_t fltWidth, size_t strideRows, size_t strideCols);
625
626 ///@}
627
628 /** @name Backward Propagation in Max Pooling Layer
629 */
630 ///@{
631 /** Perform the complete backward propagation step in a Pooling Layer. Based on the
632 * winning indices stored in the index matrix, it just forwards the activation
633 * gradients to the previous layer. */
634 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward,
635 const Tensor_t &activationGradients,
636 const Tensor_t &indexMatrix,
637 const Tensor_t & /*inputActivation*/,
638 const Tensor_t & /*outputTensor*/,
639 const PoolingDescriptors_t & /*descriptors*/,
640 PoolingWorkspace_t & /*workspace*/,
641 size_t imgHeight,
642 size_t imgWidth,
643 size_t fltHeight,
644 size_t fltWidth,
645 size_t strideRows,
646 size_t strideCols,
647 size_t nLocalViews);
648
649 ///@}
650
651 //____________________________________________________________________________
652 //
653 // Reshape Layer Propagation
654 //____________________________________________________________________________
655 /** @name Forward and Backward Propagation in Reshape Layer
656 */
657 ///@{
658
659 /** Transform the matrix \p B to a matrix with different dimensions \p A */
660 static void Reshape(Matrix_t &A, const Matrix_t &B);
661
662 /** Flattens the tensor \p B, such that each matrix, is stretched in
663 * one row, resulting with a matrix \p A. */
664 static void Flatten(Tensor_t &A, const Tensor_t &B); // size_t size, size_t nRows, size_t nCols);
665
666 /** Transforms each row of \p B to a matrix and stores it in the
667 * tensor \p B. */
668 static void Deflatten(Tensor_t &A, const Tensor_t &B); // size_t index, size_t nRows,size_t nCols);
669
670 /** Rearrage data according to time fill B x T x D out with T x B x D matrix in*/
671 static void Rearrange(Tensor_t &out, const Tensor_t &in);
672
673
674 /** Backward pass for Recurrent Networks */
675 static Matrix_t & RecurrentLayerBackward(Matrix_t & state_gradients_backward, // BxH
676 Matrix_t & input_weight_gradients,
677 Matrix_t & state_weight_gradients,
678 Matrix_t & bias_gradients,
679 Matrix_t & df, //DxH
680 const Matrix_t & state, // BxH
681 const Matrix_t & weights_input, // HxD
682 const Matrix_t & weights_state, // HxH
683 const Matrix_t & input, // BxD
684 Matrix_t & input_gradient);
685
686 // dummy RNN functions
687 static void RNNForward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
688 const Tensor_t & /* weights */, Tensor_t & /* y */, Matrix_t & /* hy */, Matrix_t & /* cy */,
689 const RNNDescriptors_t & /* descr */, RNNWorkspace_t & /* workspace */, bool /* isTraining */)
690 {
691 }
692
693 static void RNNBackward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
694 const Tensor_t & /* y */, const Tensor_t & /* dy */, const Matrix_t & /* dhy */,
695 const Matrix_t & /* dcy */, const Tensor_t & /* weights */, Tensor_t & /* dx */,
696 Matrix_t & /* dhx */, Matrix_t & /* dcx */, Tensor_t & /* dw */,
697 const RNNDescriptors_t & /* desc */, RNNWorkspace_t & /* workspace */)
698 {
699 }
700 static Matrix_t &
701 LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*cell_gradients_backward*/,
702 Matrix_t & /*input_weight_gradients*/, Matrix_t & /*forget_weight_gradients*/,
703 Matrix_t & /*candidate_weight_gradients*/, Matrix_t & /*output_weight_gradients*/,
704 Matrix_t & /*input_state_weight_gradients*/, Matrix_t & /*forget_state_weight_gradients*/,
705 Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*output_state_weight_gradients*/,
706 Matrix_t & /*input_bias_gradients*/, Matrix_t & /*forget_bias_gradients*/,
707 Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*output_bias_gradients*/, Matrix_t & /*di*/,
708 Matrix_t & /*df*/, Matrix_t & /*dc*/, Matrix_t & /*dout*/,
709 const Matrix_t & /*precStateActivations*/, const Matrix_t & /*precCellActivations*/,
710 const Matrix_t & /*fInput*/, const Matrix_t & /*fForget*/, const Matrix_t & /*fCandidate*/,
711 const Matrix_t & /*fOutput*/, const Matrix_t & /*weights_input*/,
712 const Matrix_t & /*weights_forget*/, const Matrix_t & /*weights_candidate*/,
713 const Matrix_t & /*weights_output*/, const Matrix_t & /*weights_input_state*/,
714 const Matrix_t & /*weights_forget_state*/, const Matrix_t & /*weights_candidate_state*/,
715 const Matrix_t & /*weights_output_state*/, const Matrix_t & /*input*/,
716 Matrix_t & /*input_gradient*/, Matrix_t & /*cell_gradient*/, Matrix_t & /*cell_tanh*/)
717 {
718 Fatal("TCuda::LSTMLayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
719 return state_gradients_backward;
720 }
721
722 /** Backward pass for GRU Network */
723 static Matrix_t &
724 GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*reset_weight_gradients*/,
725 Matrix_t & /*update_weight_gradients*/, Matrix_t & /*candidate_weight_gradients*/,
726 Matrix_t & /*reset_state_weight_gradients*/, Matrix_t & /*update_state_weight_gradients*/,
727 Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*reset_bias_gradients*/,
728 Matrix_t & /*update_bias_gradients*/, Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*dr*/,
729 Matrix_t & /*du*/, Matrix_t & /*dc*/, const Matrix_t & /*precStateActivations*/,
730 const Matrix_t & /*fReset*/, const Matrix_t & /*fUpdate*/, const Matrix_t & /*fCandidate*/,
731 const Matrix_t & /*weights_reset*/, const Matrix_t & /*weights_update*/,
732 const Matrix_t & /*weights_candidate*/, const Matrix_t & /*weights_reset_state*/,
733 const Matrix_t & /*weights_update_state*/, const Matrix_t & /*weights_candidate_state*/,
734 const Matrix_t & /*input*/, Matrix_t & /*input_gradient*/, bool)
735 {
736 Fatal("TCuda::GRULayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
737 return state_gradients_backward;
738 }
739 ///@}
740
741 //____________________________________________________________________________
742 //
743 // Additional Arithmetic Functions
744 //____________________________________________________________________________
745
746 /** @name Additional Arithmetic Functions
747 *
748 * Additional arithmetic on CUDA matrices used to implement the low-level
749 * interface.
750 */
751 ///@{
752
753 /** Standard multiplication of two matrices \p A and \p B with the result being
754 * written into C.
755 */
756 static void Multiply(Matrix_t &C,
757 const Matrix_t &A,
758 const Matrix_t &B);
759 /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
760 * result being written into C.
761 */
763 const Matrix_t &input,
764 const Matrix_t &Weights,
765 Scalar_t alpha = 1.0, Scalar_t beta = 0.);
766 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
767 * with the result being written into \p A.
768 */
769 static void Hadamard(Tensor_t &A,
770 const Tensor_t &B);
771 static void Hadamard(Matrix_t &A,
772 const Matrix_t &B);
773 // {
774 // Tensor_t tA(A);
775 // Hadamard( tA, Tensor_t(B));
776 // }
777
778 /** Sum columns of (m x n) matrix \p A and write the results into the first
779 * m elements in \p A.
780 */
781 static void SumColumns(Matrix_t &B,
782 const Matrix_t &A,
783 Scalar_t alpha = 1.0, Scalar_t beta = 0.);
784
785 /** Compute the sum of all elements in \p A */
786 static Scalar_t Sum(const Matrix_t &A);
787
788 /** Check two matrices for equality, taking floating point arithmetic errors into account. */
789 static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
790
791 /** Add the constant \p beta to all the elements of matrix \p A and write the
792 * result into \p A.
793 */
794 static void ConstAdd(Matrix_t &A, Scalar_t beta);
795
796 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
797 * result into \p A.
798 */
799 static void ConstMult(Matrix_t &A, Scalar_t beta);
800
801 /** Reciprocal each element of the matrix \p A and write the result into
802 * \p A
803 */
804 static void ReciprocalElementWise(Matrix_t &A);
805
806 /** Square each element of the matrix \p A and write the result into
807 * \p A
808 */
809 static void SquareElementWise(Matrix_t &A);
810
811 /** Square root each element of the matrix \p A and write the result into
812 * \p A
813 */
814 static void SqrtElementWise(Matrix_t &A);
815
816 // optimizer functions
817 static void AdamUpdate(Matrix_t & A, const Matrix_t & M, const Matrix_t & V, Scalar_t alpha, Scalar_t eps);
818 static void AdamUpdateFirstMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta);
819 static void AdamUpdateSecondMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta);
820
821 // printing of tensor
822 static void PrintTensor( const Tensor_t & A, const std::string name = "Cuda-tensor", bool = false);
823
824 ///////////////////////////////////////////////////////////////////////////////
825 /// extra functions defined only for CPU architecture !!!
826 //////////////////////////////////////////////////////////////////////////////
827
828 /** Sum rows of (m x n) matrix \p A and write the results into the first
829 * m elements in \p B.
830 */
831 static void SumRows(Matrix_t & B, const Matrix_t & A);
832
833
834};
835
836//____________________________________________________________________________
837template <typename AFloat>
838template <typename AMatrix_t>
840 const AMatrix_t &A)
841{
842 // copy from another architecture using the reference one
843 // this is not very efficient since creates temporary objects
844 TMatrixT<AFloat> tmp = A;
845 Copy(B, TCudaMatrix<AFloat>(tmp) );
846}
847
848//____________________________________________________________________________
849template <typename AFloat>
850template <typename AMatrix_t>
852 const std::vector<AMatrix_t> &A)
853{
854 for (size_t i = 0; i < B.size(); ++i) {
855 CopyDiffArch(B[i], A[i]);
856 }
857}
858
859template <typename AFloat>
860void TCuda<AFloat>::PrintTensor(const typename TCuda<AFloat>::Tensor_t & A, const std::string name, bool )
861{
862 std::cout << name << " size = " << A.GetSize() << " shape = { ";
863 auto shape = A.GetShape();
864 for (size_t k = 0; k < shape.size()-1; ++k)
865 std::cout << shape[k] << " , ";
866 std::cout << shape.back() << " } ";
867 std::cout << " strides = { ";
868 auto strides = A.GetStrides();
869 for (size_t k = 0; k < strides.size()-1; ++k)
870 std::cout << strides[k] << " , ";
871 std::cout << strides.back() << " }\n ";
872
873 if (A.GetShape().size() == 2 ) {
874 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
875 std::cout << "{ ";
876 for (size_t j = 0; j < A.GetShape()[1]; ++j) {
877 std::cout << A(i,j) << " ";
878 }
879 std::cout << " } " << std::endl;
880 }
881 } else if (A.GetShape().size() == 3 ) {
882 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
883 std::cout << "{ ";
884 for (size_t j = 0; j < A.GetHSize(); ++j) {
885 std::cout << "{ ";
886 for (size_t k = 0; k < A.GetWSize(); ++k) {
887 std::cout << A(i,j,k) << " ";
888 }
889 std::cout << " } " << std::endl;
890 }
891 std::cout << " } " << std::endl;
892 }
893 }
894 else {
895 for (size_t l = 0; l < A.GetSize(); ++l) {
896 std::cout << A.GetData()[l] << " ";
897 }
898 std::cout << "\n";
899 }
900}
901
902
903} // namespace DNN
904} // namespace TMVA
905
906#endif
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
#define X(type, name)
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition TError.cxx:244
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char name[80]
Definition TGX11.cxx:110
Implementation of the CrossEntropy as separation criterion.
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
TCudaMatrix Class.
Definition CudaMatrix.h:103
TCudaTensor Class.
Definition CudaTensor.h:84
const Shape_t & GetShape() const
Definition CudaTensor.h:188
size_t GetWSize() const
Definition CudaTensor.h:289
const AFloat * GetData() const
Definition CudaTensor.h:196
const Shape_t & GetStrides() const
Definition CudaTensor.h:189
size_t GetHSize() const
Definition CudaTensor.h:283
size_t GetFirstSize() const
Definition CudaTensor.h:274
size_t GetSize() const
Definition CudaTensor.h:192
The TCuda architecture class.
Definition Cuda.h:65
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
Definition Cuda.h:693
static void SetRandomSeed(size_t seed)
CNN::TCNNDescriptors< ConvLayer_t > ConvDescriptors_t
Definition Cuda.h:100
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition Cuda.h:183
TCudaMatrix< AFloat > Matrix_t
Definition Cuda.h:73
static Matrix_t & LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
Definition Cuda.h:701
static void FastTanh(Tensor_t &B)
Definition Cuda.h:325
static void InitializeGlorotUniform(Matrix_t &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static Scalar_t L1Regularization(const Matrix_t &W)
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void InitializeGlorotNormal(Matrix_t &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
Definition Cuda.h:177
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
Definition Cuda.h:114
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
CNN::TCNNWorkspace< ConvLayer_t > ConvWorkspace_t
Definition Cuda.h:101
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
Definition Cuda.h:179
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void InitializeGRUTensors(GenLayer_t *)
Definition Cuda.h:187
static void FreeConvWorkspace(TWorkspace *&)
Only used for certain cudnn on-device memory.
Definition Cuda.h:174
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
static void InitializeZero(Tensor_t &A)
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition Cuda.h:181
static void InitializeUniform(Matrix_t &A)
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
Definition Cuda.h:147
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
AFloat Scalar_t
Definition Cuda.h:71
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Sigmoid(Tensor_t &B)
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
Definition Cuda.h:469
static void InitializeLSTMTensors(GenLayer_t *)
Definition Cuda.h:186
static void FreeRNNWorkspace(TWorkspace *&)
Definition Cuda.h:190
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
Definition Cuda.h:121
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition Cuda.h:182
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
Definition Cuda.h:860
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void Copy(Tensor_t &A, const Tensor_t &B)
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
Definition Cuda.h:152
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void InitializeGauss(Matrix_t &A)
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
Definition Cuda.h:167
static Scalar_t L2Regularization(const Matrix_t &W)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void ScaleAdd(Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
Above functions extended to vectors.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
Definition Cuda.h:109
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static TRandom * fgRandomGen
Definition Cuda.h:67
CNN::TCNNDescriptors< PoolingLayer_t > PoolingDescriptors_t
Definition Cuda.h:103
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void InitializeIdentity(Matrix_t &A)
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
Definition Cuda.h:687
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
Definition Cuda.h:154
static void Hadamard(Matrix_t &A, const Matrix_t &B)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
Definition Cuda.h:118
TCudaTensor< AFloat > Tensor_t
Definition Cuda.h:74
CNN::TCNNWorkspace< PoolingLayer_t > PoolingWorkspace_t
Definition Cuda.h:104
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
AReal AFloat
Definition Cuda.h:70
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
Definition Cuda.h:132
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
Definition Cuda.h:156
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
Definition Cuda.h:163
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
Definition Cuda.h:178
static void ReleaseBNormDescriptors(TDescriptors *&)
Definition Cuda.h:161
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void Relu(Tensor_t &B)
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition Cuda.h:545
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
Definition Cuda.h:217
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void InitializeRNNTensors(GenLayer_t *)
Definition Cuda.h:185
TCudaDeviceBuffer< AFloat > DeviceBuffer_t
Definition Cuda.h:75
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
Definition Cuda.h:208
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void SymmetricRelu(Tensor_t &B)
DummyCudaDataType TensorDescriptor_t
Definition Cuda.h:84
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
Definition Cuda.h:159
static void FreePoolDropoutWorkspace(TWorkspace *&)
Definition Cuda.h:175
static TRandom & GetRandomGenerator()
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
Definition Cuda.h:326
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static void ReleaseDescriptor(ActivationDescriptor_t &)
Definition Cuda.h:172
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void CopyDiffArch(std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static bool IsCudnn()
Definition Cuda.h:139
static void ReleaseRNNDescriptors(TDescriptors *&)
Definition Cuda.h:189
static Matrix_t & GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
Backward pass for GRU Network.
Definition Cuda.h:724
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeZero(Matrix_t &A)
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition Cuda.h:464
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void CopyDiffArch(Tensor_t &A, const ATensor_t &B)
static void ReleasePoolDescriptors(TDescriptors *&)
Definition Cuda.h:160
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition Cuda.h:111
Generic General Layer class.
TMatrixT.
Definition TMatrixT.h:40
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::shared_ptr< std::function< double(double)> > Tanh
Definition NeuralNet.cxx:29
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
std::shared_ptr< std::function< double(double)> > Gauss
Definition NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > SoftSign
Definition NeuralNet.cxx:32
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition CudaTensor.h:47
create variable transformations
TLine l
Definition textangle.C:4
static void output()