Loading [MathJax]/jax/input/TeX/config.js
Logo ROOT  
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Cuda.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 05/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////
13// Definition of the TCuda architecture class, which provides an //
14// implementation of the low-level functionality for neural //
15// networks for the CUDA computing architectures. //
16///////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_CUDA
19#define TMVA_DNN_ARCHITECTURES_CUDA
20
21#include "TMVA/DNN/Functions.h"
26
27
28#include "cuda.h"
29#include "Cuda/CudaBuffers.h"
30#include "Cuda/CudaMatrix.h"
31#include "Cuda/CudaTensor.h"
32#include "TMVA/DNN/DataLoader.h"
33#include <utility>
34#include <vector>
35
36class TRandom;
37
38namespace TMVA
39{
40namespace DNN
41{
50 struct CudaDataType {};
52
54
55/** The TCuda architecture class.
56 *
57 * Low-level interface class for CUDA computing architectures. Contains as
58 * public types the declaration of the scalar, matrix and buffer types
59 * for this architecture as well as the remaining functions in the low-level
60 * interface in the form of static members.
61 */
62template<typename AReal = Float_t>
63class TCuda
64{
65private:
67public:
68
69 using AFloat = AReal;
71
76
81 //using OpTensorDescriptor_t = CudaOpTensorDescriptor;
84 //using ReductionDescriptor_t = CudaReduceTensorDescriptor;
91
92 using EmptyDescriptor_t = CudaEmptyDescriptor; // Used if a descriptor is not needed in a class
93
97
104
107
109
110 static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
111 return Tensor_t( {c,h*w,n}, GetTensorLayout());
112 }
113 static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
114 {
115 return Tensor_t( {t, w, b}, GetTensorLayout());
116 }
117 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
118 return Tensor_t( buffer, {c,h*w, n}, GetTensorLayout(), 0, 0);
119 }
120 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
121 {
122 return Tensor_t(buffer, {t, w, b}, GetTensorLayout());
123 }
124
125 // create a weight tensor/matrix from another tensor using its shape
126 // static Matrix_t CreateWeightTensor( Matrix_t & A) {
127 // return Matrix_t( A.GetNrows(), A.GetNcols());
128 // }
129 // create a weight tensor/matrix vector from another tensor/weight vector using the given tensor shapes
130 // this function is used by the optimizers to stgore intermidiate weights representations
131 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
132 if (!newWeights.empty()) newWeights.clear();
133 size_t n = weights.size();
134 for (size_t i = 0; i < n; ++i)
135 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
136 }
137
138 static bool IsCudnn() { return false; }
139 //____________________________________________________________________________
140 //
141 // Architecture Initialization
142 //____________________________________________________________________________
143
144 /** Initialize CNN data/operator descriptors. Not used at the moment.*/
145
146 static void InitializeBNormDescriptors(TDescriptors * & /*descriptors*/,
147 BNormLayer_t */*L = nullptr*/) {
148 Error("InitializeBNormDescriptrs", "Batch normalization on GPU is supported only with Cudnn");
149 }
150
151 static void InitializeConvDescriptors(TDescriptors *& /*descriptors*/, ConvLayer_t * /*L = nullptr*/) {}
152
153 static void InitializePoolDescriptors(TDescriptors *& /*descriptors*/, PoolingLayer_t * /*L = nullptr*/) {}
154
155 static void InitializeActivationDescriptor(ActivationDescriptor_t &/*descriptors*/, EActivationFunction /*activFunc */ , double /*coef*/ = 0.0) {}
156
157 /** Release CNN data/operator descriptors. Not used at the moment.*/
158 static void ReleaseConvDescriptors(TDescriptors * & /*descriptors*/) {}
159 static void ReleasePoolDescriptors(TDescriptors * & /*descriptors*/) {}
160 static void ReleaseBNormDescriptors(TDescriptors *& /*descriptors*/) {}
161
162 static void InitializeConvWorkspace(TWorkspace * & /*workspace*/,
163 TDescriptors * & /*descriptors*/,
164 const DNN::CNN::TConvParams & /*params*/,
165 ConvLayer_t */*L = nullptr*/) {}
166 static void InitializePoolDropoutWorkspace(TWorkspace * & /*workspace*/,
167 TDescriptors * & /*descriptors*/,
168 const DNN::CNN::TConvParams & /*params*/,
169 PoolingLayer_t */*L = nullptr*/) {}
170
171 static void ReleaseDescriptor(ActivationDescriptor_t & /*activationDescr*/) {}
172
173 static void FreeConvWorkspace(TWorkspace * & /*workspace*/) {} ///< Only used for certain cudnn on-device memory
174 static void FreePoolDropoutWorkspace(TWorkspace * & /*workspace*/) {}
175
176 static void InitializeRNNDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
177 static void InitializeLSTMDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
178 static void InitializeGRUDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
179
180 static void InitializeRNNWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
181 static void InitializeLSTMWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
182 static void InitializeGRUWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
183
184 static void InitializeRNNTensors(GenLayer_t * /*layer*/) {}
185 static void InitializeLSTMTensors(GenLayer_t * /*layer*/) {}
186 static void InitializeGRUTensors(GenLayer_t * /*layer*/) {}
187
188 static void ReleaseRNNDescriptors(TDescriptors *& /*descriptors*/) {}
189 static void FreeRNNWorkspace(TWorkspace *& /*workspace*/) {}
190
191 //static void InitializeRNNTensors(RNNLayer_t * /*layer*/) {}
192
193 //____________________________________________________________________________
194 //
195 // Propagation
196 //____________________________________________________________________________
197
198 /** @name Forward Propagation
199 * Low-level functions required for the forward propagation of activations
200 * through the network.
201 */
202 ///@{
203 /** Matrix-multiply \p input with the transpose of \pweights and
204 * write the results into \p output. */
205 static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights);
206
207 static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) {
208 Matrix_t output_matrix = output.GetMatrix();
209 MultiplyTranspose( output_matrix, input.GetMatrix(), weights);
210 //ensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
211 }
212
213 /** Add the vectors biases row-wise to the matrix output */
214 static void AddRowWise(Matrix_t &output,const Matrix_t &biases);
215
216 static void AddRowWise(Tensor_t &output, const Matrix_t &biases) {
217 Matrix_t output_matrix = output.GetMatrix();
218 AddRowWise(output_matrix, biases);
219 //Tensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
220 }
221
222 /** @name Backward Propagation (Dense Layers)
223 * Low-level functions required for the forward propagation of activations
224 * through the network.
225 */
226 ///@{
227 /** Perform the complete backward propagation step. If the provided
228 * \p activationGradientsBackward matrix is not empty, compute the
229 * gradients of the objective function with respect to the activations
230 * of the previous layer (backward direction).
231 * Also compute the weight and the bias gradients. Modifies the values
232 * in \p df and thus produces only a valid result, if it is applied the
233 * first time after the corresponding forward propagation has been per-
234 * formed. */
235 static void Backward(Tensor_t & activationGradientsBackward,
236 Matrix_t & weightGradients,
237 Matrix_t & biasGradients,
238 const Tensor_t & df,
239 const Tensor_t & activationGradients,
240 const Matrix_t & weights,
241 const Tensor_t & activationBackward);
242
243 /** Adds a the elements in matrix B scaled by c to the elements in
244 * the matrix A. This is required for the weight update in the gradient
245 * descent step.*/
246 static void ScaleAdd(Matrix_t & A,
247 const Matrix_t & B,
248 Scalar_t beta = 1.0);
249
250 static void Copy(Matrix_t & B,
251 const Matrix_t & A);
252
253 // copy from another type of matrix
254 template<typename AMatrix_t>
255 static void CopyDiffArch(Matrix_t & B, const AMatrix_t & A);
256
257
258 /** Above functions extended to vectors */
259 static void ScaleAdd(Tensor_t & A,
260 const Tensor_t & B,
261 Scalar_t beta = 1.0);
262
263 static void Copy(Tensor_t & A,
264 const Tensor_t & B);
265
266 // copy from another tensor
267 template<typename ATensor_t>
268 static void CopyDiffArch(Tensor_t & A,
269 const ATensor_t & B);
270
271 // copy from vector of matrices of different types
272 template<typename AMatrix_t>
273 static void CopyDiffArch(std::vector<Matrix_t> & A,
274 const std::vector<AMatrix_t> & B);
275
276 ///@}
277
278 //____________________________________________________________________________
279 //
280 // Activation Functions
281 //____________________________________________________________________________
282
283 /** @name Activation Functions
284 * For each activation function, the low-level interface contains two routines.
285 * One that applies the acitvation function to a matrix and one that evaluate
286 * the derivatives of the activation function at the elements of a given matrix
287 * and writes the results into the result matrix.
288 */
289 ///@{
290 /* impl using Matrix */
291 /*inline void evaluate(Matrix_t &A, EActivationFunction f)
292 {
293 Tensor_t tA(A);
294 evaluate<TCuda<AReal>>(tA,f);
295 }*/
297 const ActivationDescriptor_t activationDescr,
298 const double coef = 0.0, const AFloat alpha = 1,
299 const AFloat beta = 0);
300
301 /** Computes the gradient of the activation function */
302 static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
303 const Tensor_t & dY, const Tensor_t & X,
304 EActivationFunction activFunct,
305 const ActivationDescriptor_t activationDescr,
306 const AFloat alpha = 1,
307 const AFloat beta = 0);
308
310 const Tensor_t &A);
311
312 static void Relu(Tensor_t & B);
313 static void ReluDerivative(Tensor_t & B,
314 const Tensor_t & A);
315
316 static void Sigmoid(Tensor_t & B);
318 const Tensor_t & A);
319
320 static void Tanh(Tensor_t & B);
321 static void TanhDerivative(Tensor_t & B,
322 const Tensor_t & A);
323
324 static void FastTanh(Tensor_t &B) { return Tanh(B); }
325 static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A) { return TanhDerivative(B, A); }
326
327 static void SymmetricRelu(Tensor_t & B);
329 const Tensor_t & A);
330
331 static void SoftSign(Tensor_t & B);
333 const Tensor_t & A);
334
335 static void Gauss(Tensor_t & B);
337 const Tensor_t & A);
338 ///@}
339
340 //____________________________________________________________________________
341 //
342 // Loss Functions
343 //____________________________________________________________________________
344
345 /** @name Loss Functions
346 * Loss functions compute a scalar value given the \p output of the network
347 * for a given training input and the expected network prediction \p Y that
348 * quantifies the quality of the prediction. For each function also a routing
349 * that computes the gradients (suffixed by Gradients) must be provided for
350 * the starting of the backpropagation algorithm.
351 */
352 ///@{
353
355 const Matrix_t &weights);
356 static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
357 const Matrix_t &output, const Matrix_t &weights);
358
359 /** Sigmoid transformation is implicitly applied, thus \p output should
360 * hold the linear activations of the last layer in the net. */
362 const Matrix_t &weights);
363
364 static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
365 const Matrix_t &output, const Matrix_t &weights);
366
367 /** Softmax transformation is implicitly applied, thus \p output should
368 * hold the linear activations of the last layer in the net. */
370 const Matrix_t &weights);
372 const Matrix_t &output, const Matrix_t &weights);
373 ///@}
374
375 //____________________________________________________________________________
376 //
377 // Output Functions
378 //____________________________________________________________________________
379
380 /** @name Output Functions
381 * Output functions transform the activations \p output of the
382 * output layer in the network to a valid prediction \p YHat for
383 * the desired usage of the network, e.g. the identity function
384 * for regression or the sigmoid transformation for two-class
385 * classification.
386 */
387 ///@{
388 static void Sigmoid(Matrix_t &YHat,
389 const Matrix_t & );
390 static void Softmax(Matrix_t &YHat,
391 const Matrix_t & );
392 ///@}
393
394 //____________________________________________________________________________
395 //
396 // Regularization
397 //____________________________________________________________________________
398
399 /** @name Regularization
400 * For each regularization type two functions are required, one named
401 * <tt><Type>Regularization</tt> that evaluates the corresponding
402 * regularization functional for a given weight matrix and the
403 * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
404 * component in the gradients to the provided matrix.
405 */
406 ///@{
407
410 const Matrix_t & W,
412
415 const Matrix_t & W,
417 ///@}
418
419 //____________________________________________________________________________
420 //
421 // Initialization
422 //____________________________________________________________________________
423
424 /** @name Initialization
425 * For each initialization method, one function in the low-level interface
426 * is provided. The naming scheme is <p>Initialize<Type></p> for a given
427 * initialization method Type.
428 */
429 ///@{
430
431 static void InitializeGauss(Matrix_t & A);
434 static void InitializeZero(Matrix_t & A);
435 static void InitializeZero(Tensor_t &A);
438
439 // return static instance of random generator used for initialization
440 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
442 // set random seed for the static geenrator
443 // if the static geneerator does not exists it is created
444 static void SetRandomSeed(size_t seed);
445 ///@}
446
447 //____________________________________________________________________________
448 //
449 // Dropout
450 //____________________________________________________________________________
451
452 /** @name Dropout
453 */
454 ///@{
455
456 /** Apply dropout with activation probability \p p to the given
457 * tensor \p A and scale the result by reciprocal of \p p. */
458 static void DropoutForward(Tensor_t & A,
459 TDescriptors * descriptors,
460 TWorkspace * workspace,
461 Scalar_t p);
462
463 static void DropoutForward(Matrix_t & A, Scalar_t p) {
464 Tensor_t tA(A);
465 DropoutForward( tA, static_cast<TDescriptors *> (nullptr), static_cast<TWorkspace *> (nullptr), p );
466 }
467
468 static void DropoutBackward(Tensor_t & /* A */,
469 TDescriptors * /*descriptors */,
470 TWorkspace * /* workspace */ ) {}
471 ///@}
472
473 //____________________________________________________________________________
474 //
475 // Batch Normalization
476 //____________________________________________________________________________
477
478 /** @name Batch Normalization Layer Propagation
479 */
480 ///@{
481
482 /** The input from each batch are normalized during training to have zero mean and unit variance
483 * and they are then scaled by two parameter, different for each input variable:
484 * - a scale factor \gamma gamma
485 * - an offset \beta beta */
486
488 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
489 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
490 Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
491
492 /** During inference the inputs are not normalized using the batch mean but the previously computed
493 * at running mean and variance */
494
496 const Matrix_t &runningMeans, const Matrix_t &runningVars,
498
499 static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
500 Matrix_t &gamma, // Matrix_t &beta, (not needed)
501 Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
502 const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
503
504 //____________________________________________________________________________
505 //
506 // Convolutional Layer Propagation
507 //____________________________________________________________________________
508
509 /** @name Forward Propagation in Convolutional Layer
510 */
511 ///@{
512
513 /** Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters. */
514 static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
515
516 /** Transform the matrix B in local view format, suitable for
517 * convolution, and store it in matrix A */
518 static void Im2col(Matrix_t &A,
519 const Matrix_t &B,
520 size_t imgHeight,
521 size_t imgWidth,
522 size_t fltHeight,
523 size_t fltWidth,
524 size_t strideRows,
525 size_t strideCols,
526 size_t zeroPaddingHeight,
527 size_t zeroPaddingWidth);
528
529 static void Im2colIndices(std::vector<int> &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight,
530 size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
531 size_t zeroPaddingWidth);
532 static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector<int> & V);
533
534 /** Rotates the matrix \p B, which is representing a weights,
535 * and stores them in the matrix \p A. */
536 static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight,
537 size_t filterWidth, size_t numFilters);
538
539 /** Add the biases in the Convolutional Layer. */
540 static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
541 ///@}
542
543 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
544 static void PrepareInternals(Tensor_t &) {}
545
546 /** Forward propagation in the Convolutional layer */
548 Tensor_t & inputActivationFunc,
549 const Tensor_t &input,
550 const Matrix_t &weights, const Matrix_t & biases,
551 const DNN::CNN::TConvParams & params, EActivationFunction activFunc,
552 Tensor_t & /* inputPrime */,
553 const ConvDescriptors_t & /*descriptors*/, // Empty struct for cuda architecture
554 ConvWorkspace_t & /*workspace*/); // Empty struct for cuda architecture
555 //void * cudnnWorkspace = nullptr); // Remains nullptr for cuda architecture
556 /** @name Backward Propagation in Convolutional Layer
557 */
558 ///@{
559
560 /** Perform the complete backward propagation step in a Convolutional Layer.
561 * If the provided \p activationGradientsBackward matrix is not empty, compute the
562 * gradients of the objective function with respect to the activations
563 * of the previous layer (backward direction).
564 * Also compute the weight and the bias gradients. Modifies the values
565 * in \p df and thus produces only a valid result, if it is applied the
566 * first time after the corresponding forward propagation has been per-
567 * formed. */
568 static void ConvLayerBackward(Tensor_t &activationGradientsBackward,
569 Matrix_t &weightGradients, Matrix_t &biasGradients,
570 Tensor_t &df,
571 Tensor_t &activationGradients,
572 const Matrix_t &weights,
573 const Tensor_t &activationBackward,
574 const Tensor_t & outputTensor,
575 EActivationFunction activFunc,
576 const ConvDescriptors_t & /*descriptors*/,
577 ConvWorkspace_t & /*workspace*/,
578 size_t batchSize, size_t inputHeight,
579 size_t inputWidth, size_t depth,
580 size_t height, size_t width,
581 size_t filterDepth, size_t filterHeight,
582 size_t filterWidth, size_t nLocalViews );
583
584 /** Utility function for calculating the activation gradients of the layer
585 * before the convolutional layer. */
586 static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward,
587 const Tensor_t &df,
588 const Matrix_t &weights, size_t batchSize,
589 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
590 size_t width, size_t filterDepth, size_t filterHeight,
591 size_t filterWidth);
592
593 /** Utility function for calculating the weight gradients of the convolutional
594 * layer. */
595 static void CalculateConvWeightGradients(Matrix_t &weightGradients,
596 const Tensor_t &df,
597 const Tensor_t &activations_backward,
598 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
599 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
600 size_t filterWidth, size_t nLocalViews);
601
602 /** Utility function for calculating the bias gradients of the convolutional
603 * layer */
604 static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df,
605 size_t batchSize, size_t depth, size_t nLocalViews);
606 ///@}
607
608 //____________________________________________________________________________
609 //
610 // Max Pooling Layer Propagation
611 //____________________________________________________________________________
612 /** @name Forward Propagation in Max Pooling Layer
613 */
614 ///@{
615
616 /** Downsample the matrix \p C to the matrix \p A, using max
617 * operation, such that the winning indices are stored in matrix
618 * \p B. */
619 static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C,
620 const PoolingDescriptors_t & /*descriptors*/,
621 PoolingWorkspace_t & /*workspace*/,
622 size_t imgHeight, size_t imgWidth, size_t fltHeight,
623 size_t fltWidth, size_t strideRows, size_t strideCols);
624
625 ///@}
626
627 /** @name Backward Propagation in Max Pooling Layer
628 */
629 ///@{
630 /** Perform the complete backward propagation step in a Pooling Layer. Based on the
631 * winning idices stored in the index matrix, it just forwards the actiovation
632 * gradients to the previous layer. */
633 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward,
634 const Tensor_t &activationGradients,
635 const Tensor_t &indexMatrix,
636 const Tensor_t & /*inputActivation*/,
637 const Tensor_t & /*outputTensor*/,
638 const PoolingDescriptors_t & /*descriptors*/,
639 PoolingWorkspace_t & /*workspace*/,
640 size_t imgHeight,
641 size_t imgWidth,
642 size_t fltHeight,
643 size_t fltWidth,
644 size_t strideRows,
645 size_t strideCols,
646 size_t nLocalViews);
647
648 ///@}
649
650 //____________________________________________________________________________
651 //
652 // Reshape Layer Propagation
653 //____________________________________________________________________________
654 /** @name Forward and Backward Propagation in Reshape Layer
655 */
656 ///@{
657
658 /** Transform the matrix \p B to a matrix with different dimensions \p A */
659 static void Reshape(Matrix_t &A, const Matrix_t &B);
660
661 /** Flattens the tensor \p B, such that each matrix, is stretched in
662 * one row, resulting with a matrix \p A. */
663 static void Flatten(Tensor_t &A, const Tensor_t &B); // size_t size, size_t nRows, size_t nCols);
664
665 /** Transforms each row of \p B to a matrix and stores it in the
666 * tensor \p B. */
667 static void Deflatten(Tensor_t &A, const Tensor_t &B); // size_t index, size_t nRows,size_t nCols);
668
669 /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
670 static void Rearrange(Tensor_t &out, const Tensor_t &in);
671
672
673 /** Backward pass for Recurrent Networks */
674 static Matrix_t & RecurrentLayerBackward(Matrix_t & state_gradients_backward, // BxH
675 Matrix_t & input_weight_gradients,
676 Matrix_t & state_weight_gradients,
677 Matrix_t & bias_gradients,
678 Matrix_t & df, //DxH
679 const Matrix_t & state, // BxH
680 const Matrix_t & weights_input, // HxD
681 const Matrix_t & weights_state, // HxH
682 const Matrix_t & input, // BxD
683 Matrix_t & input_gradient);
684
685 // dummy RNN functions
686 static void RNNForward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
687 const Matrix_t & /* weights */, Tensor_t & /* y */, Matrix_t & /* hy */, Matrix_t & /* cy */,
688 const RNNDescriptors_t & /* descr */, RNNWorkspace_t & /* workspace */, bool /* isTraining */)
689 {
690 }
691
692 static void RNNBackward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
693 const Tensor_t & /* y */, const Tensor_t & /* dy */, const Matrix_t & /* dhy */,
694 const Matrix_t & /* dcy */, const Tensor_t & /* weights */, Tensor_t & /* dx */,
695 Matrix_t & /* dhx */, Matrix_t & /* dcx */, Tensor_t & /* dw */,
696 const RNNDescriptors_t & /* desc */, RNNWorkspace_t & /* workspace */)
697 {
698 }
699 static Matrix_t &
700 LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*cell_gradients_backward*/,
701 Matrix_t & /*input_weight_gradients*/, Matrix_t & /*forget_weight_gradients*/,
702 Matrix_t & /*candidate_weight_gradients*/, Matrix_t & /*output_weight_gradients*/,
703 Matrix_t & /*input_state_weight_gradients*/, Matrix_t & /*forget_state_weight_gradients*/,
704 Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*output_state_weight_gradients*/,
705 Matrix_t & /*input_bias_gradients*/, Matrix_t & /*forget_bias_gradients*/,
706 Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*output_bias_gradients*/, Matrix_t & /*di*/,
707 Matrix_t & /*df*/, Matrix_t & /*dc*/, Matrix_t & /*dout*/,
708 const Matrix_t & /*precStateActivations*/, const Matrix_t & /*precCellActivations*/,
709 const Matrix_t & /*fInput*/, const Matrix_t & /*fForget*/, const Matrix_t & /*fCandidate*/,
710 const Matrix_t & /*fOutput*/, const Matrix_t & /*weights_input*/,
711 const Matrix_t & /*weights_forget*/, const Matrix_t & /*weights_candidate*/,
712 const Matrix_t & /*weights_output*/, const Matrix_t & /*weights_input_state*/,
713 const Matrix_t & /*weights_forget_state*/, const Matrix_t & /*weights_candidate_state*/,
714 const Matrix_t & /*weights_output_state*/, const Matrix_t & /*input*/,
715 Matrix_t & /*input_gradient*/, Matrix_t & /*cell_gradient*/, Matrix_t & /*cell_tanh*/)
716 {
717 Fatal("TCuda::LSTMLayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
718 return state_gradients_backward;
719 }
720
721 /** Backward pass for GRU Network */
722 static Matrix_t &
723 GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t & /*reset_weight_gradients*/,
724 Matrix_t & /*update_weight_gradients*/, Matrix_t & /*candidate_weight_gradients*/,
725 Matrix_t & /*reset_state_weight_gradients*/, Matrix_t & /*update_state_weight_gradients*/,
726 Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*reset_bias_gradients*/,
727 Matrix_t & /*update_bias_gradients*/, Matrix_t & /*candidate_bias_gradients*/, Matrix_t & /*dr*/,
728 Matrix_t & /*du*/, Matrix_t & /*dc*/, const Matrix_t & /*precStateActivations*/,
729 const Matrix_t & /*fReset*/, const Matrix_t & /*fUpdate*/, const Matrix_t & /*fCandidate*/,
730 const Matrix_t & /*weights_reset*/, const Matrix_t & /*weights_update*/,
731 const Matrix_t & /*weights_candidate*/, const Matrix_t & /*weights_reset_state*/,
732 const Matrix_t & /*weights_update_state*/, const Matrix_t & /*weights_candidate_state*/,
733 const Matrix_t & /*input*/, Matrix_t & /*input_gradient*/, bool)
734 {
735 Fatal("TCuda::GRULayerBackward", "Recurrent layers are not supported in the native Cuda architecture!!!");
736 return state_gradients_backward;
737 }
738 ///@}
739
740 //____________________________________________________________________________
741 //
742 // Additional Arithmetic Functions
743 //____________________________________________________________________________
744
745 /** @name Additional Arithmetic Functions
746 *
747 * Additional arithmetic on CUDA matrices used to implement the low-level
748 * interface.
749 */
750 ///@{
751
752 /** Standard multiplication of two matrices \p A and \p B with the result being
753 * written into C.
754 */
755 static void Multiply(Matrix_t &C,
756 const Matrix_t &A,
757 const Matrix_t &B);
758 /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
759 * result being written into C.
760 */
762 const Matrix_t &input,
763 const Matrix_t &Weights,
764 Scalar_t alpha = 1.0, Scalar_t beta = 0.);
765 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
766 * with the result being written into \p A.
767 */
768 static void Hadamard(Tensor_t &A,
769 const Tensor_t &B);
770 static void Hadamard(Matrix_t &A,
771 const Matrix_t &B);
772 // {
773 // Tensor_t tA(A);
774 // Hadamard( tA, Tensor_t(B));
775 // }
776
777 /** Sum columns of (m x n) matrixx \p A and write the results into the first
778 * m elements in \p A.
779 */
780 static void SumColumns(Matrix_t &B,
781 const Matrix_t &A,
782 Scalar_t alpha = 1.0, Scalar_t beta = 0.);
783
784 /** Compute the sum of all elements in \p A */
785 static Scalar_t Sum(const Matrix_t &A);
786
787 /** Check two matrices for equality, taking floating point arithmetic errors into account. */
788 static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
789
790 /** Add the constant \p beta to all the elements of matrix \p A and write the
791 * result into \p A.
792 */
794
795 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
796 * result into \p A.
797 */
799
800 /** Reciprocal each element of the matrix \p A and write the result into
801 * \p A
802 */
804
805 /** Square each element of the matrix \p A and write the result into
806 * \p A
807 */
809
810 /** Square root each element of the matrix \p A and write the result into
811 * \p A
812 */
814
815 // optimizer functions
816 static void AdamUpdate(Matrix_t & A, const Matrix_t & M, const Matrix_t & V, Scalar_t alpha, Scalar_t eps);
819
820 // printing of tensor
821 static void PrintTensor( const Tensor_t & A, const std::string name = "Cuda-tensor", bool = false);
822
823 ///////////////////////////////////////////////////////////////////////////////
824 /// extra functions defined only for CPU architecture !!!
825 //////////////////////////////////////////////////////////////////////////////
826
827 /** Sum rows of (m x n) matrix \p A and write the results into the first
828 * m elements in \p B.
829 */
830 static void SumRows(Matrix_t & B, const Matrix_t & A);
831
832
833};
834
835//____________________________________________________________________________
836template <typename AFloat>
837template <typename AMatrix_t>
839 const AMatrix_t &A)
840{
841 // copy from another architecture using the reference one
842 // this is not very efficient since creates temporary objects
843 TMatrixT<AFloat> tmp = A;
844 Copy(B, TCudaMatrix<AFloat>(tmp) );
845}
846
847//____________________________________________________________________________
848template <typename AFloat>
849template <typename AMatrix_t>
851 const std::vector<AMatrix_t> &A)
852{
853 for (size_t i = 0; i < B.size(); ++i) {
854 CopyDiffArch(B[i], A[i]);
855 }
856}
857
858template <typename AFloat>
859void TCuda<AFloat>::PrintTensor(const typename TCuda<AFloat>::Tensor_t & A, const std::string name, bool )
860{
861 std::cout << name << " size = " << A.GetSize() << " shape = { ";
862 auto shape = A.GetShape();
863 for (size_t k = 0; k < shape.size()-1; ++k)
864 std::cout << shape[k] << " , ";
865 std::cout << shape.back() << " } ";
866 std::cout << " strides = { ";
867 auto strides = A.GetStrides();
868 for (size_t k = 0; k < strides.size()-1; ++k)
869 std::cout << strides[k] << " , ";
870 std::cout << strides.back() << " }\n ";
871
872 if (A.GetShape().size() == 2 ) {
873 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
874 std::cout << "{ ";
875 for (size_t j = 0; j < A.GetShape()[1]; ++j) {
876 std::cout << A(i,j) << " ";
877 }
878 std::cout << " } " << std::endl;
879 }
880 } else if (A.GetShape().size() == 3 ) {
881 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
882 std::cout << "{ ";
883 for (size_t j = 0; j < A.GetHSize(); ++j) {
884 std::cout << "{ ";
885 for (size_t k = 0; k < A.GetWSize(); ++k) {
886 std::cout << A(i,j,k) << " ";
887 }
888 std::cout << " } " << std::endl;
889 }
890 std::cout << " } " << std::endl;
891 }
892 }
893 else {
894 for (size_t l = 0; l < A.GetSize(); ++l) {
895 std::cout << A.GetData()[l] << " ";
896 }
897 std::cout << "\n";
898 }
899}
900
901
902} // namespace DNN
903} // namespace TMVA
904
905#endif
#define b(i)
Definition: RSha256.hxx:100
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:59
Layer implementing Batch Normalization.
TCudaDeviceBuffer.
Definition: CudaBuffers.h:100
TCudaHostBuffer.
Definition: CudaBuffers.h:43
TCudaMatrix Class.
Definition: CudaMatrix.h:109
TCudaTensor Class.
Definition: CudaTensor.h:84
TCudaMatrix< AFloat > GetMatrix() const
Definition: CudaTensor.h:304
The TCuda architecture class.
Definition: Cuda.h:64
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
Definition: Cuda.h:692
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:182
TCudaMatrix< AFloat > Matrix_t
Definition: Cuda.h:72
static Matrix_t & LSTMLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &)
Definition: Cuda.h:700
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SymmetricRelu(Tensor_t &B)
static void InitializeUniform(Matrix_t &A)
static void FastTanh(Tensor_t &B)
Definition: Cuda.h:324
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void InitializeIdentity(Matrix_t &A)
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void InitializeGlorotUniform(Matrix_t &A)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:176
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
Definition: Cuda.h:113
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:178
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void InitializeGRUTensors(GenLayer_t *)
Definition: Cuda.h:186
static void FreeConvWorkspace(TWorkspace *&)
Only used for certain cudnn on-device memory.
Definition: Cuda.h:173
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
static void InitializeZero(Tensor_t &A)
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:180
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
Definition: Cuda.h:146
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
AFloat Scalar_t
Definition: Cuda.h:70
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Sigmoid(Tensor_t &B)
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
Definition: Cuda.h:468
static void InitializeLSTMTensors(GenLayer_t *)
Definition: Cuda.h:185
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void FreeRNNWorkspace(TWorkspace *&)
Definition: Cuda.h:189
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
Definition: Cuda.h:120
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:181
static void PrintTensor(const Tensor_t &A, const std::string name="Cuda-tensor", bool=false)
Definition: Cuda.h:859
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Copy(Tensor_t &A, const Tensor_t &B)
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
Definition: Cuda.h:151
static void Tanh(Tensor_t &B)
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom * fgRandomGen
Definition: Cuda.h:66
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
Definition: Cuda.h:166
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void ScaleAdd(Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0)
Above functions extended to vectors.
static TMVA::Experimental::MemoryLayout GetTensorLayout()
Definition: Cuda.h:108
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void Gauss(Tensor_t &B)
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0)
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
Definition: Cuda.h:153
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
Definition: Cuda.h:686
static void Hadamard(Matrix_t &A, const Matrix_t &B)
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
Definition: Cuda.h:117
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
TCudaTensor< AFloat > Tensor_t
Definition: Cuda.h:73
static void InitializeGlorotNormal(Matrix_t &A)
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
AReal AFloat
Definition: Cuda.h:69
static Scalar_t L2Regularization(const Matrix_t &W)
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static TRandom & GetRandomGenerator()
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
Definition: Cuda.h:131
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void InitializeGauss(Matrix_t &A)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
Definition: Cuda.h:155
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
Definition: Cuda.h:162
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cuda.h:177
static void ReleaseBNormDescriptors(TDescriptors *&)
Definition: Cuda.h:160
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition: Cuda.h:544
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
Definition: Cuda.h:216
static void SoftSign(Tensor_t &B)
static void InitializeRNNTensors(GenLayer_t *)
Definition: Cuda.h:184
TCudaDeviceBuffer< AFloat > DeviceBuffer_t
Definition: Cuda.h:74
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
Definition: Cuda.h:207
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
Definition: Cuda.h:158
static void FreePoolDropoutWorkspace(TWorkspace *&)
Definition: Cuda.h:174
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
Definition: Cuda.h:325
static void SetRandomSeed(size_t seed)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void ReleaseDescriptor(ActivationDescriptor_t &)
Definition: Cuda.h:171
static void CopyDiffArch(std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B)
static bool IsCudnn()
Definition: Cuda.h:138
static void ReleaseRNNDescriptors(TDescriptors *&)
Definition: Cuda.h:188
static Matrix_t & GRULayerBackward(Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool)
Backward pass for GRU Network.
Definition: Cuda.h:723
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0)
Computes the gradient of the activation function.
static void Relu(Tensor_t &B)
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Scalar_t L1Regularization(const Matrix_t &W)
static void InitializeZero(Matrix_t &A)
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition: Cuda.h:463
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void CopyDiffArch(Tensor_t &A, const ATensor_t &B)
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void ReleasePoolDescriptors(TDescriptors *&)
Definition: Cuda.h:159
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cuda.h:110
Generic General Layer class.
Definition: GeneralLayer.h:49
TMatrixT.
Definition: TMatrixT.h:39
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double gamma(double x)
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:47
create variable transformations
auto * l
Definition: textangle.C:4
REAL epsilon
Definition: triangle.c:617
static void output(int code)
Definition: gifencode.c:226