Logo ROOT   6.14/05
Reference Guide
Cpu.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 05/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12  //////////////////////////////////////////////////////////////////
13 // Definition of the TCpu architecture, which provides a //
14  // multi-threaded CPU implementation of the low-level interface //
15  // networks for Cpus using BLAS and Roots TThreadExecutor //
16  //////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_CPU
19 #define TMVA_DNN_ARCHITECTURES_CPU
20 
21 #include "TMVA/DNN/Functions.h"
22 
23 #include "Cpu/CpuBuffer.h"
24 #include "Cpu/CpuMatrix.h"
25 #include <vector>
26 
27 class TRandom;
28 
29 namespace TMVA
30 {
31 namespace DNN
32 {
33  //class EActivationFunction;
34 
35 /** The TCpu architecture class.
36  *
37  * Low-level interface class for multi-threaded CPU architectures. Contains as
38  * public types the declaration of the scalar, matrix and data loader types
39  * for this architecture as well as the remaining functions in the low-level
40  * interface in the form of static members.
41  */
42 template<typename AReal = Real_t>
43 class TCpu
44 {
45 private:
46  static TRandom * fgRandomGen;
47 public:
48 
49  using Scalar_t = AReal;
53 
54  //____________________________________________________________________________
55  //
56  // Propagation
57  //____________________________________________________________________________
58 
59  /** @name Forward Propagation
60  * Low-level functions required for the forward propagation of activations
61  * through the network.
62  */
63  ///@{
64  /** Matrix-multiply \p input with the transpose of \pweights and
65  * write the results into \p output. */
67  const TCpuMatrix<Scalar_t> &input,
68  const TCpuMatrix<Scalar_t> &weights);
69  /** Add the vectors biases row-wise to the matrix output */
71  const TCpuMatrix<Scalar_t> &biases);
72  ///@}
73 
74  /** @name Backward Propagation
75  * Low-level functions required for the forward propagation of activations
76  * through the network.
77  */
78  ///@{
79  /** Perform the complete backward propagation step. If the provided
80  * \p activationGradientsBackward matrix is not empty, compute the
81  * gradients of the objective function with respect to the activations
82  * of the previous layer (backward direction).
83  * Also compute the weight and the bias gradients. Modifies the values
84  * in \p df and thus produces only a valid result, if it is applied the
85  * first time after the corresponding forward propagation has been per-
86  * formed. */
87  static void Backward(TCpuMatrix<Scalar_t> & activationGradientsBackward,
88  TCpuMatrix<Scalar_t> & weightGradients,
89  TCpuMatrix<Scalar_t> & biasGradients,
91  const TCpuMatrix<Scalar_t> & activationGradients,
92  const TCpuMatrix<Scalar_t> & weights,
93  const TCpuMatrix<Scalar_t> & activationBackward);
94  /** Backward pass for Recurrent Networks */
95  static Matrix_t & RecurrentLayerBackward(TCpuMatrix<Scalar_t> & state_gradients_backward, // BxH
96  TCpuMatrix<Scalar_t> & input_weight_gradients,
97  TCpuMatrix<Scalar_t> & state_weight_gradients,
98  TCpuMatrix<Scalar_t> & bias_gradients,
99  TCpuMatrix<Scalar_t> & df, //DxH
100  const TCpuMatrix<Scalar_t> & state, // BxH
101  const TCpuMatrix<Scalar_t> & weights_input, // HxD
102  const TCpuMatrix<Scalar_t> & weights_state, // HxH
103  const TCpuMatrix<Scalar_t> & input, // BxD
104  TCpuMatrix<Scalar_t> & input_gradient);
105  /** Adds a the elements in matrix B scaled by c to the elements in
106  * the matrix A. This is required for the weight update in the gradient
107  * descent step.*/
108  static void ScaleAdd(TCpuMatrix<Scalar_t> & A,
109  const TCpuMatrix<Scalar_t> & B,
110  Scalar_t beta = 1.0);
111 
112  static void Copy(TCpuMatrix<Scalar_t> & B,
113  const TCpuMatrix<Scalar_t> & A);
114 
115  // copy from another type of matrix
116  template<typename AMatrix_t>
117  static void CopyDiffArch(TCpuMatrix<Scalar_t> & B, const AMatrix_t & A);
118 
119 
120  /** Above functions extended to vectors */
121  static void ScaleAdd(std::vector<TCpuMatrix<Scalar_t>> & A,
122  const std::vector<TCpuMatrix<Scalar_t>> & B,
123  Scalar_t beta = 1.0);
124 
125  static void Copy(std::vector<TCpuMatrix<Scalar_t>> & A,
126  const std::vector<TCpuMatrix<Scalar_t>> & B);
127 
128  // copy from another architecture
129  template<typename AMatrix_t>
130  static void CopyDiffArch(std::vector<TCpuMatrix<Scalar_t>> & A,
131  const std::vector<AMatrix_t> & B);
132 
133  ///@}
134 
135  //____________________________________________________________________________
136  //
137  // Activation Functions
138  //____________________________________________________________________________
139 
140  /** @name Activation Functions
141  * For each activation function, the low-level interface contains two routines.
142  * One that applies the acitvation function to a matrix and one that evaluate
143  * the derivatives of the activation function at the elements of a given matrix
144  * and writes the results into the result matrix.
145  */
146  ///@{
148  const TCpuMatrix<Scalar_t> &A);
149 
150  static void Relu(TCpuMatrix<Scalar_t> & B);
151  static void ReluDerivative(TCpuMatrix<Scalar_t> & B,
152  const TCpuMatrix<Scalar_t> & A);
153 
154  static void Sigmoid(TCpuMatrix<Scalar_t> & B);
156  const TCpuMatrix<Scalar_t> & A);
157 
158  static void Tanh(TCpuMatrix<Scalar_t> & B);
159  static void TanhDerivative(TCpuMatrix<Scalar_t> & B,
160  const TCpuMatrix<Scalar_t> & A);
161 
162  static void SymmetricRelu(TCpuMatrix<Scalar_t> & B);
164  const TCpuMatrix<Scalar_t> & A);
165 
166  static void SoftSign(TCpuMatrix<Scalar_t> & B);
168  const TCpuMatrix<Scalar_t> & A);
169 
170  static void Gauss(TCpuMatrix<Scalar_t> & B);
171  static void GaussDerivative(TCpuMatrix<Scalar_t> & B,
172  const TCpuMatrix<Scalar_t> & A);
173  ///@}
174 
175  //____________________________________________________________________________
176  //
177  // Loss Functions
178  //____________________________________________________________________________
179 
180  /** @name Loss Functions
181  * Loss functions compute a scalar value given the \p output of the network
182  * for a given training input and the expected network prediction \p Y that
183  * quantifies the quality of the prediction. For each function also a routing
184  * that computes the gradients (suffixed by Gradients) must be provided for
185  * the starting of the backpropagation algorithm.
186  */
187  ///@{
188 
190  const TCpuMatrix<Scalar_t> &weights);
192  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
193 
194  /** Sigmoid transformation is implicitly applied, thus \p output should
195  * hold the linear activations of the last layer in the net. */
197  const TCpuMatrix<Scalar_t> &weights);
198 
200  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
201 
202  /** Softmax transformation is implicitly applied, thus \p output should
203  * hold the linear activations of the last layer in the net. */
205  const TCpuMatrix<Scalar_t> &weights);
207  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
208  ///@}
209 
210  //____________________________________________________________________________
211  //
212  // Output Functions
213  //____________________________________________________________________________
214 
215  /** @name Output Functions
216  * Output functions transform the activations \p output of the
217  * output layer in the network to a valid prediction \p YHat for
218  * the desired usage of the network, e.g. the identity function
219  * for regression or the sigmoid transformation for two-class
220  * classification.
221  */
222  ///@{
223  static void Sigmoid(TCpuMatrix<Scalar_t> &YHat,
224  const TCpuMatrix<Scalar_t> & );
225  static void Softmax(TCpuMatrix<Scalar_t> &YHat,
226  const TCpuMatrix<Scalar_t> & );
227  ///@}
228 
229  //____________________________________________________________________________
230  //
231  // Regularization
232  //____________________________________________________________________________
233 
234  /** @name Regularization
235  * For each regularization type two functions are required, one named
236  * <tt><Type>Regularization</tt> that evaluates the corresponding
237  * regularization functional for a given weight matrix and the
238  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
239  * component in the gradients to the provided matrix.
240  */
241  ///@{
242 
245  const TCpuMatrix<Scalar_t> & W,
247 
250  const TCpuMatrix<Scalar_t> & W,
252  ///@}
253 
254  //____________________________________________________________________________
255  //
256  // Initialization
257  //____________________________________________________________________________
258 
259  /** @name Initialization
260  * For each initialization method, one function in the low-level interface
261  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
262  * initialization method Type.
263  */
264  ///@{
265 
266  static void InitializeGauss(TCpuMatrix<Scalar_t> & A);
269  static void InitializeZero(TCpuMatrix<Scalar_t> & A);
272 
273  // return static instance of random generator used for initialization
274  // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
275  static TRandom & GetRandomGenerator();
276  // set random seed for the static geenrator
277  // if the static geneerator does not exists it is created
278  static void SetRandomSeed(size_t seed);
279  ///@}
280 
281  //____________________________________________________________________________
282  //
283  // Dropout
284  //____________________________________________________________________________
285 
286  /** @name Dropout
287  */
288  ///@{
289 
290  /** Apply dropout with activation probability \p p to the given
291  * matrix \p A and scale the result by reciprocal of \p p. */
292  static void Dropout(TCpuMatrix<Scalar_t> & A, Scalar_t p);
293 
294  ///@}
295 
296  //____________________________________________________________________________
297  //
298  // Convolutional Layer Propagation
299  //____________________________________________________________________________
300 
301  /** @name Forward Propagation in Convolutional Layer
302  */
303  ///@{
304 
305  /** Transform the matrix B in local view format, suitable for
306  * convolution, and store it in matrix A */
307  static void Im2col(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, size_t imgHeight, size_t imgWidth, size_t fltHeight,
308  size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
309  size_t zeroPaddingWidth);
310  static void Im2colIndices(std::vector<int> &V, const TCpuMatrix<AReal> &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight,
311  size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
312  size_t zeroPaddingWidth);
313  static void Im2colFast(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, const std::vector<int> & V);
314 
315  /** Rotates the matrix \p B, which is representing a weights,
316  * and stores them in the matrix \p A. */
317  static void RotateWeights(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, size_t filterDepth, size_t filterHeight,
318  size_t filterWidth, size_t numFilters);
319 
320  /** Add the biases in the Convolutional Layer. */
321  static void AddConvBiases(TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &biases);
322  ///@}
323 
324  /** Forward propagation in the Convolutional layer */
325  static void ConvLayerForward(std::vector<TCpuMatrix<Scalar_t>> & output, std::vector<TCpuMatrix<Scalar_t>> & derivatives,
326  const std::vector<TCpuMatrix<Scalar_t>> &input,
327  const TCpuMatrix<Scalar_t> & weights, const TCpuMatrix<Scalar_t> & biases,
328  EActivationFunction func, const std::vector<int> & vIndices,
329  size_t nlocalViews, size_t nlocalViewPixels,
330  Scalar_t dropoutProbability, bool applyDropout);
331 
332  /** @name Backward Propagation in Convolutional Layer
333  */
334  ///@{
335 
336  /** Perform the complete backward propagation step in a Convolutional Layer.
337  * If the provided \p activationGradientsBackward matrix is not empty, compute the
338  * gradients of the objective function with respect to the activations
339  * of the previous layer (backward direction).
340  * Also compute the weight and the bias gradients. Modifies the values
341  * in \p df and thus produces only a valid result, if it is applied the
342  * first time after the corresponding forward propagation has been per-
343  * formed. */
344  static void ConvLayerBackward(std::vector<TCpuMatrix<Scalar_t>> &activationGradientsBackward,
345  TCpuMatrix<Scalar_t> &weightGradients, TCpuMatrix<Scalar_t> &biasGradients,
346  std::vector<TCpuMatrix<Scalar_t>> &df,
347  const std::vector<TCpuMatrix<Scalar_t>> &activationGradients,
348  const TCpuMatrix<Scalar_t> &weights,
349  const std::vector<TCpuMatrix<Scalar_t>> &activationBackward, size_t batchSize,
350  size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,
351  size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews);
352 
353  /** Utility function for calculating the activation gradients of the layer
354  * before the convolutional layer. */
355  static void CalculateConvActivationGradients(std::vector<TCpuMatrix<Scalar_t>> &activationGradientsBackward,
356  const std::vector<TCpuMatrix<Scalar_t>> &df,
357  const TCpuMatrix<Scalar_t> &weights, size_t batchSize,
358  size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
359  size_t width, size_t filterDepth, size_t filterHeight,
360  size_t filterWidth);
361 
362  /** Utility function for calculating the weight gradients of the convolutional
363  * layer. */
364  static void CalculateConvWeightGradients(TCpuMatrix<Scalar_t> &weightGradients,
365  const std::vector<TCpuMatrix<Scalar_t>> &df,
366  const std::vector<TCpuMatrix<Scalar_t>> &activations_backward,
367  size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
368  size_t height, size_t width, size_t filterDepth, size_t filterHeight,
369  size_t filterWidth, size_t nLocalViews);
370 
371  /** Utility function for calculating the bias gradients of the convolutional
372  * layer */
373  static void CalculateConvBiasGradients(TCpuMatrix<Scalar_t> &biasGradients, const std::vector<TCpuMatrix<Scalar_t>> &df,
374  size_t batchSize, size_t depth, size_t nLocalViews);
375  ///@}
376 
377  //____________________________________________________________________________
378  //
379  // Max Pooling Layer Propagation
380  //____________________________________________________________________________
381  /** @name Forward Propagation in Max Pooling Layer
382  */
383  ///@{
384 
385  /** Downsample the matrix \p C to the matrix \p A, using max
386  * operation, such that the winning indices are stored in matrix
387  * \p B. */
388  static void Downsample(TCpuMatrix<AReal> &A, TCpuMatrix<AReal> &B, const TCpuMatrix<AReal> &C, size_t imgHeight,
389  size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols);
390 
391  ///@}
392 
393  /** @name Backward Propagation in Max Pooling Layer
394  */
395  ///@{
396  /** Perform the complete backward propagation step in a Pooling Layer. Based on the
397  * winning idices stored in the index matrix, it just forwards the actiovation
398  * gradients to the previous layer. */
399  static void MaxPoolLayerBackward(std::vector<TCpuMatrix<AReal>> &activationGradientsBackward,
400  const std::vector<TCpuMatrix<AReal>> &activationGradients,
401  const std::vector<TCpuMatrix<AReal>> &indexMatrix, size_t batchSize, size_t depth,
402  size_t nLocalViews);
403 
404  ///@}
405 
406  //____________________________________________________________________________
407  //
408  // Reshape Layer Propagation
409  //____________________________________________________________________________
410  /** @name Forward and Backward Propagation in Reshape Layer
411  */
412  ///@{
413 
414  /** Transform the matrix \p B to a matrix with different dimensions \p A */
415  static void Reshape(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B);
416 
417  /** Flattens the tensor \p B, such that each matrix, is stretched in
418  * one row, resulting with a matrix \p A. */
419  static void Flatten(TCpuMatrix<AReal> &A, const std::vector<TCpuMatrix<AReal>> &B, size_t size, size_t nRows,
420  size_t nCols);
421 
422  /** Transforms each row of \p B to a matrix and stores it in the
423  * tensor \p B. */
424  static void Deflatten(std::vector<TCpuMatrix<AReal>> &A, const TCpuMatrix<AReal> &B, size_t index, size_t nRows,
425  size_t nCols);
426  /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
427  static void Rearrange(std::vector<TCpuMatrix<AReal>> &out, const std::vector<TCpuMatrix<AReal>> &in);
428 
429 
430  ///@}
431 
432  //____________________________________________________________________________
433  //
434  // Additional Arithmetic Functions
435  //____________________________________________________________________________
436 
437  /** @name Additional Arithmetic Functions
438  *
439  * Additional arithmetic on CUDA matrices used to implement the low-level
440  * interface.
441  */
442  ///@{
443 
444  /** Standard multiplication of two matrices \p A and \p B with the result being
445  * written into C.
446  */
447  static void Multiply(TCpuMatrix<Scalar_t> &C,
448  const TCpuMatrix<Scalar_t> &A,
449  const TCpuMatrix<Scalar_t> &B);
450  /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
451  * result being written into C.
452  */
454  const TCpuMatrix<Scalar_t> &input,
455  const TCpuMatrix<Scalar_t> &Weights,
456  Scalar_t alpha = 1.0, Scalar_t beta = 0.);
457  /** In-place Hadamard (element-wise) product of matrices \p A and \p B
458  * with the result being written into \p A.
459  */
460  static void Hadamard(TCpuMatrix<Scalar_t> &A,
461  const TCpuMatrix<Scalar_t> &B);
462 
463  /** Sum columns of (m x n) matrixx \p A and write the results into the first
464  * m elements in \p A.
465  */
466  static void SumColumns(TCpuMatrix<Scalar_t> &B,
467  const TCpuMatrix<Scalar_t> &A,
468  Scalar_t alpha = 1.0, Scalar_t beta = 0.);
469 
470  /** Compute the sum of all elements in \p A */
471  static Scalar_t Sum(const TCpuMatrix<Scalar_t> &A);
472 
473 };
474 
475 //____________________________________________________________________________
476 template <typename Real_t>
477 template <typename AMatrix_t>
479  const AMatrix_t &A)
480 {
481  // copy from another architecture using the reference one
482  // this is not very efficient since creates temporary objects
483  TMatrixT<Real_t> tmp = A;
484  Copy(B, TCpuMatrix<Real_t>(tmp) );
485 }
486 
487 //____________________________________________________________________________
488 template <typename Real_t>
489 template <typename AMatrix_t>
491  const std::vector<AMatrix_t> &A)
492 {
493  for (size_t i = 0; i < B.size(); ++i) {
494  CopyDiffArch(B[i], A[i]);
495  }
496 }
497 
498 
499 } // namespace DNN
500 } // namespace TMVA
501 
502 #endif
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t >> &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A...
Definition: Propagation.cxx:99
static double B[]
static void CopyDiffArch(TCpuMatrix< Scalar_t > &B, const AMatrix_t &A)
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Sigmoid(TCpuMatrix< Scalar_t > &B)
static void MeanSquaredErrorGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
The TCpuMatrix class.
Definition: CpuMatrix.h:72
static void InitializeGlorotNormal(TCpuMatrix< Scalar_t > &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void Rearrange(std::vector< TCpuMatrix< AReal >> &out, const std::vector< TCpuMatrix< AReal >> &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
The TCpu architecture class.
Definition: Cpu.h:43
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Arithmetic.cxx:115
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:25
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A...
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void TanhDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
static Scalar_t CrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeIdentity(TCpuMatrix< Scalar_t > &A)
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
static TRandom * fgRandomGen
Definition: Cpu.h:46
static Matrix_t & RecurrentLayerBackward(TCpuMatrix< Scalar_t > &state_gradients_backward, TCpuMatrix< Scalar_t > &input_weight_gradients, TCpuMatrix< Scalar_t > &state_weight_gradients, TCpuMatrix< Scalar_t > &bias_gradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &state, const TCpuMatrix< Scalar_t > &weights_input, const TCpuMatrix< Scalar_t > &weights_state, const TCpuMatrix< Scalar_t > &input, TCpuMatrix< Scalar_t > &input_gradient)
Backward pass for Recurrent Networks.
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void InitializeUniform(TCpuMatrix< Scalar_t > &A)
static double A[]
static Scalar_t MeanSquaredError(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
double beta(double x, double y)
Calculates the beta function.
TMatrixT.
Definition: TMatrixDfwd.h:22
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:60
static Scalar_t L1Regularization(const TCpuMatrix< Scalar_t > &W)
static void SoftSign(TCpuMatrix< Scalar_t > &B)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:79
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C...
Definition: Arithmetic.cxx:28
static void InitializeGauss(TCpuMatrix< Scalar_t > &A)
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t >> &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void SetRandomSeed(size_t seed)
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer. ...
static void SymmetricReluDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void AddL1RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
Definition: Arithmetic.cxx:85
static void Dropout(TCpuMatrix< Scalar_t > &A, Scalar_t p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
Definition: Dropout.cxx:24
static void MaxPoolLayerBackward(std::vector< TCpuMatrix< AReal >> &activationGradientsBackward, const std::vector< TCpuMatrix< AReal >> &activationGradients, const std::vector< TCpuMatrix< AReal >> &indexMatrix, size_t batchSize, size_t depth, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static double C[]
static void ReluDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
TCpuBuffer.
Definition: CpuBuffer.h:43
static TRandom & GetRandomGenerator()
static void AddL2RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void InitializeZero(TCpuMatrix< Scalar_t > &A)
static void SymmetricRelu(TCpuMatrix< Scalar_t > &B)
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t >> &output, std::vector< TCpuMatrix< Scalar_t >> &derivatives, const std::vector< TCpuMatrix< Scalar_t >> &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, EActivationFunction func, const std::vector< int > &vIndices, size_t nlocalViews, size_t nlocalViewPixels, Scalar_t dropoutProbability, bool applyDropout)
Forward propagation in the Convolutional layer.
static void IdentityDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void SoftSignDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static Scalar_t L2Regularization(const TCpuMatrix< Scalar_t > &W)
static Scalar_t SoftmaxCrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
Abstract ClassifierFactory template that handles arbitrary types.
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal >> &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A...
static void SoftmaxCrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
static void Softmax(TCpuMatrix< Scalar_t > &YHat, const TCpuMatrix< Scalar_t > &)
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
static void Gauss(TCpuMatrix< Scalar_t > &B)
static void Tanh(TCpuMatrix< Scalar_t > &B)
static void Relu(TCpuMatrix< Scalar_t > &B)
AReal Scalar_t
Definition: Cpu.h:49
static void CrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
static void Deflatten(std::vector< TCpuMatrix< AReal >> &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void SigmoidDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
Definition: Arithmetic.cxx:56
static Scalar_t Sum(const TCpuMatrix< Scalar_t > &A)
Compute the sum of all elements in A.
static void GaussDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void InitializeGlorotUniform(TCpuMatrix< Scalar_t > &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).