Logo ROOT  
Reference Guide
Reference.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////////
13// Declaration of the TReference architecture, which provides a //
14// reference implementation of the low-level interface for the DNN //
15// implementation based on ROOT's TMatrixT matrix type. //
16///////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19#define TMVA_DNN_ARCHITECTURES_REFERENCE
20
21#include "TMatrix.h"
22//#include "TMVA/RTensor.hxx"
23#include "TMVA/DNN/Functions.h"
27#include <vector>
28
29class TRandom;
30
31namespace TMVA
32{
33namespace DNN
34{
35// struct TDescriptors {
36// };
37// struct TWorkspace {
38// };
39
40/*! The reference architecture class.
41*
42* Class template that contains the reference implementation of the low-level
43* interface for the DNN implementation. The reference implementation uses the
44* TMatrixT class template to represent matrices.
45*
46* \tparam AReal The floating point type used to represent scalars.
47*/
48template<typename AReal>
50{
51private:
53public:
54
55 using Scalar_t = AReal;
58 //using Tensor_t = TMVA::Experimental::RTensor<AReal>;
59
60 //____________________________________________________________________________
61 //
62 // Propagation
63 //____________________________________________________________________________
64
65 /** @name Forward Propagation
66 * Low-level functions required for the forward propagation of activations
67 * through the network.
68 */
69 ///@{
70 /** Matrix-multiply \p input with the transpose of \pweights and
71 * write the results into \p output. */
73 const TMatrixT<Scalar_t> &input,
74 const TMatrixT<Scalar_t> &weights);
75 /** Add the vectors biases row-wise to the matrix output */
77 const TMatrixT<Scalar_t> &biases);
78 ///@}
79
80 /** @name Backward Propagation
81 * Low-level functions required for the forward propagation of activations
82 * through the network.
83 */
84 ///@{
85 /** Perform the complete backward propagation step. If the provided
86 * \p activationGradientsBackward matrix is not empty, compute the
87 * gradients of the objective function with respect to the activations
88 * of the previous layer (backward direction).
89 * Also compute the weight and the bias gradients. Modifies the values
90 * in \p df and thus produces only a valid result, if it is applied the
91 * first time after the corresponding forward propagation has been per-
92 * formed. */
93 static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
94 TMatrixT<Scalar_t> & weightGradients,
95 TMatrixT<Scalar_t> & biasGradients,
97 const TMatrixT<Scalar_t> & activationGradients,
98 const TMatrixT<Scalar_t> & weights,
99 const TMatrixT<Scalar_t> & activationBackward);
100 /** Backpropagation step for a Recurrent Neural Network */
101 static Matrix_t & RecurrentLayerBackward(TMatrixT<Scalar_t> & state_gradients_backward, // BxH
102 TMatrixT<Scalar_t> & input_weight_gradients,
103 TMatrixT<Scalar_t> & state_weight_gradients,
104 TMatrixT<Scalar_t> & bias_gradients,
105 TMatrixT<Scalar_t> & df, //DxH
106 const TMatrixT<Scalar_t> & state, // BxH
107 const TMatrixT<Scalar_t> & weights_input, // HxD
108 const TMatrixT<Scalar_t> & weights_state, // HxH
109 const TMatrixT<Scalar_t> & input, // BxD
110 TMatrixT<Scalar_t> & input_gradient);
111 /** Adds a the elements in matrix B scaled by c to the elements in
112 * the matrix A. This is required for the weight update in the gradient
113 * descent step.*/
114 static void ScaleAdd(TMatrixT<Scalar_t> & A,
115 const TMatrixT<Scalar_t> & B,
116 Scalar_t beta = 1.0);
117
118 static void Copy(TMatrixT<Scalar_t> & A,
119 const TMatrixT<Scalar_t> & B);
120
121 // copy from another type of matrix
122 template<typename AMatrix_t>
123 static void CopyDiffArch(TMatrixT<Scalar_t> & A, const AMatrix_t & B);
124
125
126 /** Above functions extended to vectors */
127 static void ScaleAdd(std::vector<TMatrixT<Scalar_t>> & A,
128 const std::vector<TMatrixT<Scalar_t>> & B,
129 Scalar_t beta = 1.0);
130
131 static void Copy(std::vector<TMatrixT<Scalar_t>> & A, const std::vector<TMatrixT<Scalar_t>> & B);
132
133 // copy from another architecture
134 template<typename AMatrix_t>
135 static void CopyDiffArch(std::vector<TMatrixT<Scalar_t> > & A, const std::vector<AMatrix_t> & B);
136
137
138 ///@}
139
140 //____________________________________________________________________________
141 //
142 // Activation Functions
143 //____________________________________________________________________________
144
145 /** @name Activation Functions
146 * For each activation function, the low-level interface contains two routines.
147 * One that applies the acitvation function to a matrix and one that evaluate
148 * the derivatives of the activation function at the elements of a given matrix
149 * and writes the results into the result matrix.
150 */
151 ///@{
152 static void Identity(TMatrixT<AReal> & B);
154 const TMatrixT<AReal> & A);
155
156 static void Relu(TMatrixT<AReal> & B);
157 static void ReluDerivative(TMatrixT<AReal> & B,
158 const TMatrixT<AReal> & A);
159
160 static void Sigmoid(TMatrixT<AReal> & B);
161 static void SigmoidDerivative(TMatrixT<AReal> & B,
162 const TMatrixT<AReal> & A);
163
164 static void Tanh(TMatrixT<AReal> & B);
165 static void TanhDerivative(TMatrixT<AReal> & B,
166 const TMatrixT<AReal> & A);
167
168 static void SymmetricRelu(TMatrixT<AReal> & B);
170 const TMatrixT<AReal> & A);
171
172 static void SoftSign(TMatrixT<AReal> & B);
174 const TMatrixT<AReal> & A);
175
176 static void Gauss(TMatrixT<AReal> & B);
177 static void GaussDerivative(TMatrixT<AReal> & B,
178 const TMatrixT<AReal> & A);
179
180 ///@}
181
182 //____________________________________________________________________________
183 //
184 // Loss Functions
185 //____________________________________________________________________________
186
187 /** @name Loss Functions
188 * Loss functions compute a scalar value given the \p output of the network
189 * for a given training input and the expected network prediction \p Y that
190 * quantifies the quality of the prediction. For each function also a routing
191 * that computes the gradients (suffixed by Gradients) must be provided for
192 * the starting of the backpropagation algorithm.
193 */
194 ///@{
195
196 static AReal MeanSquaredError(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
197 const TMatrixT<AReal> &weights);
199 const TMatrixT<AReal> &weights);
200
201 /** Sigmoid transformation is implicitly applied, thus \p output should
202 * hold the linear activations of the last layer in the net. */
203 static AReal CrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
204
206 const TMatrixT<AReal> &weights);
207
208 /** Softmax transformation is implicitly applied, thus \p output should
209 * hold the linear activations of the last layer in the net. */
210 static AReal SoftmaxCrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
211 const TMatrixT<AReal> &weights);
213 const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
214 ///@}
215
216 //____________________________________________________________________________
217 //
218 // Output Functions
219 //____________________________________________________________________________
220
221 /** @name Output Functions
222 * Output functions transform the activations \p output of the
223 * output layer in the network to a valid prediction \p YHat for
224 * the desired usage of the network, e.g. the identity function
225 * for regression or the sigmoid transformation for two-class
226 * classification.
227 */
228 ///@{
229 static void Sigmoid(TMatrixT<AReal> &YHat,
230 const TMatrixT<AReal> & );
231 static void Softmax(TMatrixT<AReal> &YHat,
232 const TMatrixT<AReal> & );
233 ///@}
234
235 //____________________________________________________________________________
236 //
237 // Regularization
238 //____________________________________________________________________________
239
240 /** @name Regularization
241 * For each regularization type two functions are required, one named
242 * <tt><Type>Regularization</tt> that evaluates the corresponding
243 * regularization functional for a given weight matrix and the
244 * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
245 * component in the gradients to the provided matrix.
246 */
247 ///@{
248
249 static AReal L1Regularization(const TMatrixT<AReal> & W);
251 const TMatrixT<AReal> & W,
252 AReal weightDecay);
253
254 static AReal L2Regularization(const TMatrixT<AReal> & W);
256 const TMatrixT<AReal> & W,
257 AReal weightDecay);
258 ///@}
259
260 //____________________________________________________________________________
261 //
262 // Initialization
263 //____________________________________________________________________________
264
265 /** @name Initialization
266 * For each initialization method, one function in the low-level interface
267 * is provided. The naming scheme is <p>Initialize<Type></p> for a given
268 * initialization method Type.
269 */
270 ///@{
271
272 static void InitializeGauss(TMatrixT<AReal> & A);
273
274 static void InitializeUniform(TMatrixT<AReal> & A);
275
276 static void InitializeIdentity(TMatrixT<AReal> & A);
277
278 static void InitializeZero(TMatrixT<AReal> & A);
279
281
283
284 // return static instance of random generator used for initialization
285 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
286 static TRandom & GetRandomGenerator();
287 // set random seed for the static geenrator
288 // if the static geneerator does not exists it is created
289 static void SetRandomSeed(size_t seed);
290
291
292 ///@}
293
294 //____________________________________________________________________________
295 //
296 // Dropout
297 //____________________________________________________________________________
298
299 /** @name Dropout
300 */
301 ///@{
302
303 /** Apply dropout with activation probability \p p to the given
304 * matrix \p A and scale the result by reciprocal of \p p. */
305 //static void Dropout(TMatrixT<AReal> & A, AReal dropoutProbability);
306 static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p);
308 {
309 Tensor_t & tA = A; // Tensor and matrix are same types
310 DropoutForward(tA, static_cast<TDescriptors *>(nullptr), static_cast<TWorkspace *>(nullptr), p);
311 }
312
313 ///@}
314
315
316 //____________________________________________________________________________
317 //
318 // Convolutional Layer Propagation
319 //____________________________________________________________________________
320
321 /** @name Forward Propagation in Convolutional Layer
322 */
323 ///@{
324
325 /** Transform the matrix \p B in local view format, suitable for
326 * convolution, and store it in matrix \p A. */
327 static void Im2col(TMatrixT<AReal> &A,
328 const TMatrixT<AReal> &B,
329 size_t imgHeight,
330 size_t imgWidth,
331 size_t fltHeight,
332 size_t fltWidth,
333 size_t strideRows,
334 size_t strideCols,
335 size_t zeroPaddingHeight,
336 size_t zeroPaddingWidth);
337
338 static void Im2colIndices(std::vector<int> &, const TMatrixT<AReal> &, size_t, size_t, size_t, size_t ,
339 size_t , size_t , size_t , size_t ,size_t ) {
340 Fatal("Im2ColIndices","This function is not implemented for ref architectures");
341 }
342 static void Im2colFast(TMatrixT<AReal> &, const TMatrixT<AReal> &, const std::vector<int> & ) {
343 Fatal("Im2ColFast","This function is not implemented for ref architectures");
344 }
345
346 /** Rotates the matrix \p B, which is representing a weights,
347 * and stores them in the matrix \p A. */
348 static void RotateWeights(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t filterDepth, size_t filterHeight,
349 size_t filterWidth, size_t numFilters);
350
351 /** Add the biases in the Convolutional Layer. */
352 static void AddConvBiases(TMatrixT<AReal> &output, const TMatrixT<AReal> &biases);
353 ///@}
354
355 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
356 static void PrepareInternals(std::vector<TMatrixT<AReal>> &) {}
357
358 /** Forward propagation in the Convolutional layer */
359 static void ConvLayerForward(std::vector<TMatrixT<AReal>> & /*output*/,
360 std::vector<TMatrixT<AReal>> & /*derivatives*/,
361 const std::vector<TMatrixT<AReal>> & /*input*/,
362 const TMatrixT<AReal> & /*weights*/, const TMatrixT<AReal> & /*biases*/,
363 const DNN::CNN::TConvParams & /*params*/, EActivationFunction /*activFunc*/,
364 std::vector<TMatrixT<AReal>> & /*inputPrime*/) {
365 Fatal("ConvLayerForward","This function is not implemented for ref architectures");
366 }
367
368
369 /** @name Backward Propagation in Convolutional Layer
370 */
371 ///@{
372
373 /** Perform the complete backward propagation step in a Convolutional Layer.
374 * If the provided \p activationGradientsBackward matrix is not empty, compute the
375 * gradients of the objective function with respect to the activations
376 * of the previous layer (backward direction).
377 * Also compute the weight and the bias gradients. Modifies the values
378 * in \p df and thus produces only a valid result, if it is applied the
379 * first time after the corresponding forward propagation has been per-
380 * formed. */
381 static void ConvLayerBackward(std::vector<TMatrixT<AReal>> &,
383 std::vector<TMatrixT<AReal>> &,
384 const std::vector<TMatrixT<AReal>> &,
385 const TMatrixT<AReal> &, const std::vector<TMatrixT<AReal>> &,
386 size_t , size_t , size_t , size_t , size_t,
387 size_t , size_t , size_t , size_t , size_t) {
388 Fatal("ConvLayerBackward","This function is not implemented for ref architectures");
389
390 }
391
392#ifdef HAVE_CNN_REFERENCE
393 /** Utility function for calculating the activation gradients of the layer
394 * before the convolutional layer. */
395 static void CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activationGradientsBackward,
396 const std::vector<TMatrixT<AReal>> &df, const TMatrixT<AReal> &weights,
397 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
398 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
399 size_t filterWidth);
400
401 /** Utility function for calculating the weight gradients of the convolutional
402 * layer. */
403 static void CalculateConvWeightGradients(TMatrixT<AReal> &weightGradients, const std::vector<TMatrixT<AReal>> &df,
404 const std::vector<TMatrixT<AReal>> &activationBackward, size_t batchSize,
405 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
406 size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
407 size_t nLocalViews);
408
409 /** Utility function for calculating the bias gradients of the convolutional
410 * layer. */
411 static void CalculateConvBiasGradients(TMatrixT<AReal> &biasGradients, const std::vector<TMatrixT<AReal>> &df,
412 size_t batchSize, size_t depth, size_t nLocalViews);
413 ///@}
414
415#endif
416
417 //____________________________________________________________________________
418 //
419 // Max Pooling Layer Propagation
420 //____________________________________________________________________________
421 /** @name Forward Propagation in Max Pooling Layer
422 */
423 ///@{
424
425 /** Downsample the matrix \p C to the matrix \p A, using max
426 * operation, such that the winning indices are stored in matrix
427 * \p B. */
428 static void Downsample(TMatrixT<AReal> &A, TMatrixT<AReal> &B, const TMatrixT<AReal> &C, size_t imgHeight,
429 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols);
430
431 ///@}
432
433 /** @name Backward Propagation in Max Pooling Layer
434 */
435 ///@{
436
437 /** Perform the complete backward propagation step in a Max Pooling Layer. Based on the
438 * winning idices stored in the index matrix, it just forwards the actiovation
439 * gradients to the previous layer. */
440 static void MaxPoolLayerBackward(TMatrixT<AReal> &activationGradientsBackward,
441 const TMatrixT<AReal> &activationGradients,
442 const TMatrixT<AReal> &indexMatrix,
443 size_t imgHeight,
444 size_t imgWidth,
445 size_t fltHeight,
446 size_t fltWidth,
447 size_t strideRows,
448 size_t strideCol,
449 size_t nLocalViews);
450 ///@}
451 //____________________________________________________________________________
452 //
453 // Reshape Layer Propagation
454 //____________________________________________________________________________
455 /** @name Forward and Backward Propagation in Reshape Layer
456 */
457 ///@{
458
459 /** Transform the matrix \p B to a matrix with different dimensions \p A */
460 static void Reshape(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
461
462 /** Flattens the tensor \p B, such that each matrix, is stretched in one row, resulting with a matrix \p A. */
463 static void Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
464 size_t nCols);
465
466 /** Transforms each row of \p B to a matrix and stores it in the tensor \p B. */
467 static void Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<Scalar_t> &B, size_t index, size_t nRows,
468 size_t nCols);
469 /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
470 static void Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in);
471
472 ///@}
473
474 //____________________________________________________________________________
475 //
476 // Additional Arithmetic Functions
477 //____________________________________________________________________________
478
479 /** Sum columns of (m x n) matrixx \p A and write the results into the first
480 * m elements in \p A.
481 */
482 static void SumColumns(TMatrixT<AReal> &B, const TMatrixT<AReal> &A);
483
484 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
485 * with the result being written into \p A.
486 */
487 static void Hadamard(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
488
489 /** Add the constant \p beta to all the elements of matrix \p A and write the
490 * result into \p A.
491 */
492 static void ConstAdd(TMatrixT<AReal> &A, AReal beta);
493
494 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
495 * result into \p A.
496 */
497 static void ConstMult(TMatrixT<AReal> &A, AReal beta);
498
499 /** Reciprocal each element of the matrix \p A and write the result into
500 * \p A
501 */
503
504 /** Square each element of the matrix \p A and write the result into
505 * \p A
506 */
507 static void SquareElementWise(TMatrixT<AReal> &A);
508
509 /** Square root each element of the matrix \p A and write the result into
510 * \p A
511 */
512 static void SqrtElementWise(TMatrixT<AReal> &A);
513
514 // optimizer update functions
515
516 /// Update functions for ADAM optimizer
517 static void AdamUpdate(TMatrixT<AReal> & A, const TMatrixT<AReal> & M, const TMatrixT<AReal> & V, AReal alpha, AReal eps);
518 static void AdamUpdateFirstMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
519 static void AdamUpdateSecondMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
520
521
522
523 //____________________________________________________________________________
524 //
525 // AutoEncoder Propagation
526 //____________________________________________________________________________
527
528 // Add Biases to the output
529 static void AddBiases(TMatrixT<AReal> &A,
530 const TMatrixT<AReal> &biases);
531
532 // Updating parameters after every backward pass. Weights and biases are
533 // updated.
534 static void
536 TMatrixT<AReal> &z, TMatrixT<AReal> &fVBiases,
537 TMatrixT<AReal> &fHBiases, TMatrixT<AReal> &fWeights,
538 TMatrixT<AReal> &VBiasError, TMatrixT<AReal> &HBiasError,
539 AReal learningRate, size_t fBatchSize);
540
541 // Softmax functions redifined
542 static void SoftmaxAE(TMatrixT<AReal> & A);
543
544
545 // Corrupt the input values randomly on corruption Level.
546 //Basically inputs are masked currently.
547 static void CorruptInput(TMatrixT<AReal> & input,
548 TMatrixT<AReal> & corruptedInput,
549 AReal corruptionLevel);
550
551 //Encodes the input Values in the compressed form.
552 static void EncodeInput(TMatrixT<AReal> &input,
553 TMatrixT<AReal> &compressedInput,
554 TMatrixT<AReal> &Weights);
555
556 // reconstructs the input. The reconstructed Input has same dimensions as that
557 // of the input.
558 static void ReconstructInput(TMatrixT<AReal> & compressedInput,
559 TMatrixT<AReal> & reconstructedInput,
560 TMatrixT<AReal> &fWeights);
561
562
563 static void ForwardLogReg(TMatrixT<AReal> &input,
565 TMatrixT<AReal> &fWeights);
566
567 static void UpdateParamsLogReg(TMatrixT<AReal> &input,
569 TMatrixT<AReal> &difference,
571 TMatrixT<AReal> &fWeights,
572 TMatrixT<AReal> &fBiases,
573 AReal learningRate,
574 size_t fBatchSize);
575
576};
577
578
579// implement the templated member functions
580template <typename AReal>
581template <typename AMatrix_t>
583{
584 TMatrixT<AReal> tmp = B;
585 A = tmp;
586}
587
588template <typename AReal>
589template <typename AMatrix_t>
590void TReference<AReal>::CopyDiffArch(std::vector<TMatrixT<AReal>> &A, const std::vector<AMatrix_t> &B)
591{
592 for (size_t i = 0; i < A.size(); ++i) {
593 CopyDiffArch(A[i], B[i]);
594 }
595}
596
597
598
599} // namespace DNN
600} // namespace TMVA
601
602#endif
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Fatal(const char *location, const char *msgfmt,...)
The reference architecture class.
Definition: Reference.h:50
static void AdamUpdate(TMatrixT< AReal > &A, const TMatrixT< AReal > &M, const TMatrixT< AReal > &V, AReal alpha, AReal eps)
Update functions for ADAM optimizer.
Definition: Arithmetic.hxx:103
static void AdamUpdateSecondMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
Definition: Arithmetic.hxx:129
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition: Reference.h:307
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeIdentity(TMatrixT< AReal > &A)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
Definition: Propagation.hxx:23
static void InitializeGlorotNormal(TMatrixT< AReal > &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void AdamUpdateFirstMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
Definition: Arithmetic.hxx:117
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Relu(TMatrixT< AReal > &B)
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftmaxAE(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.hxx:30
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void Im2colFast(TMatrixT< AReal > &, const TMatrixT< AReal > &, const std::vector< int > &)
Definition: Reference.h:342
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void EncodeInput(TMatrixT< AReal > &input, TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &Weights)
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ReconstructInput(TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &reconstructedInput, TMatrixT< AReal > &fWeights)
static AReal L2Regularization(const TMatrixT< AReal > &W)
static void Im2colIndices(std::vector< int > &, const TMatrixT< AReal > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Definition: Reference.h:338
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static void ConstAdd(TMatrixT< AReal > &A, AReal beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:48
static void Gauss(TMatrixT< AReal > &B)
static void SetRandomSeed(size_t seed)
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal CrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeZero(TMatrixT< AReal > &A)
static void Tanh(TMatrixT< AReal > &B)
static void SoftSign(TMatrixT< AReal > &B)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void ReciprocalElementWise(TMatrixT< AReal > &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:70
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.hxx:40
static void SquareElementWise(TMatrixT< AReal > &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:81
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static TRandom * fgRandomGen
Definition: Reference.h:52
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void Hadamard(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.hxx:37
static void UpdateParams(TMatrixT< AReal > &x, TMatrixT< AReal > &tildeX, TMatrixT< AReal > &y, TMatrixT< AReal > &z, TMatrixT< AReal > &fVBiases, TMatrixT< AReal > &fHBiases, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &VBiasError, TMatrixT< AReal > &HBiasError, AReal learningRate, size_t fBatchSize)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.hxx:76
static void Sigmoid(TMatrixT< AReal > &B)
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition: Reference.h:582
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Identity(TMatrixT< AReal > &B)
static void UpdateParamsLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &output, TMatrixT< AReal > &difference, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &fBiases, AReal learningRate, size_t fBatchSize)
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
Definition: Reference.h:381
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
static void ConvLayerForward(std::vector< TMatrixT< AReal > > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const TMatrixT< AReal > &, const DNN::CNN::TConvParams &, EActivationFunction, std::vector< TMatrixT< AReal > > &)
Forward propagation in the Convolutional layer.
Definition: Reference.h:359
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.hxx:86
static void CorruptInput(TMatrixT< AReal > &input, TMatrixT< AReal > &corruptedInput, AReal corruptionLevel)
static void InitializeGauss(TMatrixT< AReal > &A)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void SqrtElementWise(TMatrixT< AReal > &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:92
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:25
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ForwardLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights)
static void ConstMult(TMatrixT< AReal > &A, AReal beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:59
static void InitializeGlorotUniform(TMatrixT< AReal > &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddBiases(TMatrixT< AReal > &A, const TMatrixT< AReal > &biases)
static TRandom & GetRandomGenerator()
static void PrepareInternals(std::vector< TMatrixT< AReal > > &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition: Reference.h:356
static Matrix_t & RecurrentLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &state_weight_gradients, TMatrixT< Scalar_t > &bias_gradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &state, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backpropagation step for a Recurrent Neural Network.
static void InitializeUniform(TMatrixT< AReal > &A)
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double B[]
static double A[]
static double C[]
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
static void output(int code)
Definition: gifencode.c:226