Logo ROOT   6.14/05
Reference Guide
DeepNet.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TDeepNet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Deep Neural Network *
12  * *
13  * Authors (alphabetical): *
14  * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16  * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17  * *
18  * Copyright (c) 2005-2015: *
19  * CERN, Switzerland *
20  * U. of Victoria, Canada *
21  * MPI-K Heidelberg, Germany *
22  * U. of Bonn, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef TMVA_DNN_DEEPNET
30 #define TMVA_DNN_DEEPNET
31 
32 #include "TString.h"
33 
34 #include "TMVA/DNN/Functions.h"
36 
37 #include "TMVA/DNN/GeneralLayer.h"
38 #include "TMVA/DNN/DenseLayer.h"
39 #include "TMVA/DNN/ReshapeLayer.h"
40 
41 #include "TMVA/DNN/CNN/ConvLayer.h"
43 
44 #include "TMVA/DNN/RNN/RNNLayer.h"
45 
46 #ifdef HAVE_DAE
47 #include "TMVA/DNN/DAE/CompressionLayer.h"
48 #include "TMVA/DNN/DAE/CorruptionLayer.h"
49 #include "TMVA/DNN/DAE/ReconstructionLayer.h"
50 #include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
51 #endif
52 
53 #include <vector>
54 #include <cmath>
55 
56 
57 namespace TMVA {
58 namespace DNN {
59 
60  using namespace CNN;
61  using namespace RNN;
62  //using namespace DAE;
63 
64 /** \class TDeepNet
65 
66  Generic Deep Neural Network class.
67 
68  This classs encapsulates the information for all types of Deep Neural Networks.
69 
70  \tparam Architecture The Architecture type that holds the
71  architecture-specific data types.
72  */
73 template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
74 class TDeepNet {
75 public:
76  using Matrix_t = typename Architecture_t::Matrix_t;
77  using Scalar_t = typename Architecture_t::Scalar_t;
78 
79 private:
80  bool inline isInteger(Scalar_t x) const { return x == floor(x); }
81  size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
82 
83 private:
84  std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
85 
86  size_t fBatchSize; ///< Batch size used for training and evaluation.
87  size_t fInputDepth; ///< The depth of the input.
88  size_t fInputHeight; ///< The height of the input.
89  size_t fInputWidth; ///< The width of the input.
90 
91  size_t fBatchDepth; ///< The depth of the batch used for training/testing.
92  size_t fBatchHeight; ///< The height of the batch used for training/testing.
93  size_t fBatchWidth; ///< The width of the batch used for training/testing.
94 
95  bool fIsTraining; ///< Is the network training?
96 
97  ELossFunction fJ; ///< The loss function of the network.
98  EInitialization fI; ///< The initialization method of the network.
99  ERegularization fR; ///< The regularization used for the network.
100  Scalar_t fWeightDecay; ///< The weight decay factor.
101 
102 public:
103  /*! Default Constructor */
104  TDeepNet();
105 
106  /*! Constructor */
107  TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
108  size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
109  ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
110 
111  /*! Copy-constructor */
112  TDeepNet(const TDeepNet &);
113 
114  /*! Destructor */
115  ~TDeepNet();
116 
117  /*! Function for adding Convolution layer in the Deep Neural Network,
118  * with a given depth, filter height and width, striding in rows and columns,
119  * the zero paddings, as well as the activation function and the dropout
120  * probability. Based on these parameters, it calculates the width and height
121  * of the convolutional layer. */
122  TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
123  size_t strideCols, size_t paddingHeight, size_t paddingWidth,
124  EActivationFunction f, Scalar_t dropoutProbability = 1.0);
125 
126  /*! Function for adding Convolution Layer in the Deep Neural Network,
127  * when the layer is already created. */
128  void AddConvLayer(TConvLayer<Architecture_t> *convLayer);
129 
130  /*! Function for adding Pooling layer in the Deep Neural Network,
131  * with a given filter height and width, striding in rows and columns as
132  * well as the dropout probability. The depth is same as the previous
133  * layer depth. Based on these parameters, it calculates the width and
134  * height of the pooling layer. */
135  TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
136  size_t strideCols, Scalar_t dropoutProbability = 1.0);
137  /*! Function for adding Max Pooling layer in the Deep Neural Network,
138  * when the layer is already created. */
139  void AddMaxPoolLayer(CNN::TMaxPoolLayer<Architecture_t> *maxPoolLayer);
140 
141 
142  /*! Function for adding Recurrent Layer in the Deep Neural Network,
143  * with given parameters */
144  TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
145  bool rememberState = false);
146 
147  /*! Function for adding Vanilla RNN when the layer is already created
148  */
149  void AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer);
150 
151  /*! Function for adding Dense Connected Layer in the Deep Neural Network,
152  * with a given width, activation function and dropout probability.
153  * Based on the previous layer dimensions, it calculates the input width
154  * of the fully connected layer. */
155  TDenseLayer<Architecture_t> *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);
156 
157  /*! Function for adding Dense Layer in the Deep Neural Network, when
158  * the layer is already created. */
159  void AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer);
160 
161  /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
162  * height and width. It will take every matrix from the previous layer and
163  * reshape it to a matrix with new dimensions. */
164  TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
165 
166  /*! Function for adding Reshape Layer in the Deep Neural Network, when
167  * the layer is already created. */
168  void AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer);
169 
170 #ifdef HAVE_DAE /// DAE functions
171  /*! Function for adding Corruption layer in the Deep Neural Network,
172  * with given number of visibleUnits and hiddenUnits. It corrupts input
173  * according to given corruptionLevel and dropoutProbability. */
174  TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
175  Scalar_t dropoutProbability, Scalar_t corruptionLevel);
176 
177  /*! Function for adding Corruption Layer in the Deep Neural Network,
178  * when the layer is already created. */
179  void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
180 
181  /*! Function for adding Compression layer in the Deep Neural Network,
182  * with given number of visibleUnits and hiddenUnits. It compresses the input units
183  * taking weights and biases from prev layers. */
184  TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
185  Scalar_t dropoutProbability, EActivationFunction f,
186  std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
187 
188  /*! Function for adding Compression Layer in the Deep Neural Network, when
189  * the layer is already created. */
190  void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
191 
192  /*! Function for adding Reconstruction layer in the Deep Neural Network,
193  * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
194  * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
195  * must be passed as in corruptionLayer. */
196  TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
197  Scalar_t learningRate, EActivationFunction f,
198  std::vector<Matrix_t> weights,
199  std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
200  Scalar_t dropoutProbability);
201 
202  /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
203  * the layer is already created. */
204  void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
205 
206  /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
207  * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
208  TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
209  size_t testDataBatchSize,
210  Scalar_t learningRate);
211 
212  /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
213  * the layer is already created. */
214  void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
215 
216  /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
217  * layers. */
218  void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
219  Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
220  bool applyDropout = false);
221 
222  /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
223  * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
224  */
225  void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226  size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
227 #endif
228 
229  /*! Function for initialization of the Neural Net. */
230  void Initialize();
231 
232  /*! Function that executes the entire forward pass in the network. */
233  void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
234 
235  /*! Function for parallel forward in the vector of deep nets, where the master
236  * net is the net calling this function. There is one batch for one deep net.*/
237  void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
238  std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
239 
240  /*! Function that executes the entire backward pass in the network. */
241  void Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights);
242 
243 
244  /*! Function for parallel backward in the vector of deep nets, where the master
245  * net is the net calling this function and getting the updates from the other nets.
246  * There is one batch for one deep net.*/
247  void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
248  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
249 
250  /*! Function for parallel backward in the vector of deep nets, where the master
251  * net is the net calling this function and getting the updates from the other nets,
252  * following the momentum strategy. There is one batch for one deep net.*/
253  void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
254  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
255  Scalar_t momentum);
256 
257  /*! Function for parallel backward in the vector of deep nets, where the master
258  * net is the net calling this function and getting the updates from the other nets,
259  * following the Nestorov momentum strategy. There is one batch for one deep net.*/
260  void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
261  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
262  Scalar_t momentum);
263 
264  /*! Function that will update the weights and biases in the layers that
265  * contain weights and biases. */
266  void Update(Scalar_t learningRate);
267 
268  /*! Function for evaluating the loss, based on the activations stored
269  * in the last layer. */
270  Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
271 
272  /*! Function for evaluating the loss, based on the propagation of the given input. */
273  Scalar_t Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights,
274  bool applyDropout = false, bool includeRegularization = true);
275 
276  /*! Prediction based on activations stored in the last layer. */
277  void Prediction(Matrix_t &predictions, EOutputFunction f) const;
278 
279  /*! Prediction for the given inputs, based on what network learned. */
280  void Prediction(Matrix_t &predictions, std::vector<Matrix_t> input, EOutputFunction f);
281 
282  /*! Print the Deep Net Info */
283  void Print() const;
284 
285  /*! Get the layer in the vector of layers at poistion i */
286  inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
287  inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
288 
289  /* Depth and the output width of the network. */
290  inline size_t GetDepth() const { return fLayers.size(); }
291  inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
292 
293  /* Return a reference to the layers. */
294  inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
295  inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
296 
297  /*! Remove all layers from the network. */
298  inline void Clear() { fLayers.clear(); }
299 
300  /*! Getters */
301  inline size_t GetBatchSize() const { return fBatchSize; }
302  inline size_t GetInputDepth() const { return fInputDepth; }
303  inline size_t GetInputHeight() const { return fInputHeight; }
304  inline size_t GetInputWidth() const { return fInputWidth; }
305 
306  inline size_t GetBatchDepth() const { return fBatchDepth; }
307  inline size_t GetBatchHeight() const { return fBatchHeight; }
308  inline size_t GetBatchWidth() const { return fBatchWidth; }
309 
310  inline bool IsTraining() const { return fIsTraining; }
311 
312  inline ELossFunction GetLossFunction() const { return fJ; }
313  inline EInitialization GetInitialization() const { return fI; }
314  inline ERegularization GetRegularization() const { return fR; }
315  inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
316 
317  /*! Setters */
318  // FIXME many of these won't work as the data structure storing activations
319  // and gradients have not changed in all the layers, also params in layers
320  // have not changed either
321  inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
322  inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
323  inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
324  inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
325  inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
326  inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
327  inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
328  inline void SetLossFunction(ELossFunction J) { fJ = J; }
329  inline void SetInitialization(EInitialization I) { fI = I; }
330  inline void SetRegularization(ERegularization R) { fR = R; }
331  inline void SetWeightDecay(Scalar_t weightDecay) { fWeightDecay = weightDecay; }
332 };
333 
334 //
335 // Deep Net Class - Implementation
336 //
337 //______________________________________________________________________________
338 template <typename Architecture_t, typename Layer_t>
340  : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
341  fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
342  fIsTraining(true), fWeightDecay(0.0)
343 {
344  // Nothing to do here.
345 }
346 
347 //______________________________________________________________________________
348 template <typename Architecture_t, typename Layer_t>
349 TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
350  size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
352  : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
353  fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
354  fR(R), fWeightDecay(weightDecay)
355 {
356  // Nothing to do here.
357 }
358 
359 //______________________________________________________________________________
360 template <typename Architecture_t, typename Layer_t>
364  fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
365  fWeightDecay(deepNet.fWeightDecay)
366 {
367  // Nothing to do here.
368 }
369 
370 //______________________________________________________________________________
371 template <typename Architecture_t, typename Layer_t>
373 {
374  // Relese the layers memory
375 }
376 
377 //______________________________________________________________________________
378 template <typename Architecture_t, typename Layer_t>
379 auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
380 {
381  Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
382  if (!isInteger(dimension) || dimension <= 0) {
383  this->Print();
384  int iLayer = fLayers.size();
385  Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
386  iLayer, imgDim, fltDim, padding, stride);
387  // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
388  // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
389  // std::exit(EXIT_FAILURE);
390  }
391 
392  return (size_t)dimension;
393 }
394 
395 //______________________________________________________________________________
396 template <typename Architecture_t, typename Layer_t>
398  size_t filterWidth, size_t strideRows,
399  size_t strideCols, size_t paddingHeight,
400  size_t paddingWidth, EActivationFunction f,
401  Scalar_t dropoutProbability)
402 {
403  // All variables defining a convolutional layer
404  size_t batchSize = this->GetBatchSize();
405  size_t inputDepth;
406  size_t inputHeight;
407  size_t inputWidth;
408  size_t height;
409  size_t width;
410  size_t filterDepth;
411  size_t weightsNRows = depth;
412  size_t weightsNCols;
413  size_t biasesNRows = depth;
414  size_t biasesNCols = 1;
415  size_t outputNSlices = this->GetBatchSize();
416  size_t outputNRows = depth;
417  size_t outputNCols;
419  ERegularization reg = this->GetRegularization();
420  Scalar_t decay = this->GetWeightDecay();
421 
422  if (fLayers.size() == 0) {
423  inputDepth = this->GetInputDepth();
424  inputHeight = this->GetInputHeight();
425  inputWidth = this->GetInputWidth();
426  } else {
427  Layer_t *lastLayer = fLayers.back();
428  inputDepth = lastLayer->GetDepth();
429  inputHeight = lastLayer->GetHeight();
430  inputWidth = lastLayer->GetWidth();
431  }
432 
433  height = calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows);
434  width = calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols);
435 
436  filterDepth = inputDepth;
437 
438  weightsNCols = filterDepth * filterHeight * filterWidth;
439  outputNCols = height * width;
440 
441  // Create the conv layer
443  batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, weightsNRows, weightsNCols, biasesNRows,
444  biasesNCols, outputNSlices, outputNRows, outputNCols, init, filterDepth, filterHeight, filterWidth, strideRows,
445  strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
446 
447  fLayers.push_back(convLayer);
448  return convLayer;
449 }
450 
451 //______________________________________________________________________________
452 template <typename Architecture_t, typename Layer_t>
454 {
455  fLayers.push_back(convLayer);
456 }
457 
458 //______________________________________________________________________________
459 template <typename Architecture_t, typename Layer_t>
461  size_t strideRows, size_t strideCols,
462  Scalar_t dropoutProbability)
463 {
464  size_t batchSize = this->GetBatchSize();
465  size_t inputDepth;
466  size_t inputHeight;
467  size_t inputWidth;
468  size_t height;
469  size_t width;
470  size_t outputNSlices = this->GetBatchSize();
471  size_t outputNRows;
472  size_t outputNCols;
473 
474  if (fLayers.size() == 0) {
475  inputDepth = this->GetInputDepth();
476  inputHeight = this->GetInputHeight();
477  inputWidth = this->GetInputWidth();
478  } else {
479  Layer_t *lastLayer = fLayers.back();
480  inputDepth = lastLayer->GetDepth();
481  inputHeight = lastLayer->GetHeight();
482  inputWidth = lastLayer->GetWidth();
483  }
484 
485  height = calculateDimension(inputHeight, frameHeight, 0, strideRows);
486  width = calculateDimension(inputWidth, frameWidth, 0, strideCols);
487 
488  outputNRows = inputDepth;
489  outputNCols = height * width;
490 
492  batchSize, inputDepth, inputHeight, inputWidth, height, width, outputNSlices, outputNRows, outputNCols,
493  frameHeight, frameWidth, strideRows, strideCols, dropoutProbability);
494 
495  // But this creates a copy or what?
496  fLayers.push_back(maxPoolLayer);
497 
498  return maxPoolLayer;
499 }
500 
501 //______________________________________________________________________________
502 template <typename Architecture_t, typename Layer_t>
504 {
505  fLayers.push_back(maxPoolLayer);
506 }
507 
508 //______________________________________________________________________________
509 template <typename Architecture_t, typename Layer_t>
511  size_t timeSteps,
512  bool rememberState)
513 {
514 
515  // should check if input and time size are consistent
516 
517  //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
518  size_t inputHeight, inputWidth;
519  if (fLayers.size() == 0) {
520  inputHeight = this->GetInputHeight();
521  inputWidth = this->GetInputWidth();
522  } else {
523  Layer_t *lastLayer = fLayers.back();
524  inputHeight = lastLayer->GetHeight();
525  inputWidth = lastLayer->GetWidth();
526  }
527  if (inputSize != inputWidth) {
528  Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
529  }
530  if (timeSteps != inputHeight) {
531  Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
532  }
533 
534  TBasicRNNLayer<Architecture_t> *basicRNNLayer =
535  new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
537  fLayers.push_back(basicRNNLayer);
538  return basicRNNLayer;
539 }
540 
541 //______________________________________________________________________________
542 template <typename Architecture_t, typename Layer_t>
544 {
545  fLayers.push_back(basicRNNLayer);
546 }
547 
548 //DAE
549 #ifdef HAVE_DAE
550 
551 //______________________________________________________________________________
552 template <typename Architecture_t, typename Layer_t>
553 TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
554  size_t hiddenUnits,
555  Scalar_t dropoutProbability,
556  Scalar_t corruptionLevel)
557 {
558  size_t batchSize = this->GetBatchSize();
559 
560  TCorruptionLayer<Architecture_t> *corruptionLayer =
561  new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
562  fLayers.push_back(corruptionLayer);
563  return corruptionLayer;
564 }
565 //______________________________________________________________________________
566 
567 template <typename Architecture_t, typename Layer_t>
568 void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
569 {
570  fLayers.push_back(corruptionLayer);
571 }
572 
573 //______________________________________________________________________________
574 template <typename Architecture_t, typename Layer_t>
575 TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
576  size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
577  std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
578 {
579  size_t batchSize = this->GetBatchSize();
580 
581  TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
582  batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
583  fLayers.push_back(compressionLayer);
584  return compressionLayer;
585 }
586 //______________________________________________________________________________
587 
588 template <typename Architecture_t, typename Layer_t>
589 void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
590 {
591  fLayers.push_back(compressionLayer);
592 }
593 
594 //______________________________________________________________________________
595 template <typename Architecture_t, typename Layer_t>
596 TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
597  size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
598  std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
599 {
600  size_t batchSize = this->GetBatchSize();
601 
602  TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
603  batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
604  fLayers.push_back(reconstructionLayer);
605  return reconstructionLayer;
606 }
607 //______________________________________________________________________________
608 
609 template <typename Architecture_t, typename Layer_t>
611  TReconstructionLayer<Architecture_t> *reconstructionLayer)
612 {
613  fLayers.push_back(reconstructionLayer);
614 }
615 
616 //______________________________________________________________________________
617 template <typename Architecture_t, typename Layer_t>
618 TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
619  size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
620 {
621  size_t batchSize = this->GetBatchSize();
622 
623  TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
624  new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
625  fLayers.push_back(logisticRegressionLayer);
626  return logisticRegressionLayer;
627 }
628 //______________________________________________________________________________
629 template <typename Architecture_t, typename Layer_t>
631  TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
632 {
633  fLayers.push_back(logisticRegressionLayer);
634 }
635 #endif
636 
637 
638 //______________________________________________________________________________
639 template <typename Architecture_t, typename Layer_t>
641  Scalar_t dropoutProbability)
642 {
643  size_t batchSize = this->GetBatchSize();
644  size_t inputWidth;
646  ERegularization reg = this->GetRegularization();
647  Scalar_t decay = this->GetWeightDecay();
648 
649  if (fLayers.size() == 0) {
650  inputWidth = this->GetInputWidth();
651  } else {
652  Layer_t *lastLayer = fLayers.back();
653  inputWidth = lastLayer->GetWidth();
654  }
655 
656  TDenseLayer<Architecture_t> *denseLayer =
657  new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
658 
659  fLayers.push_back(denseLayer);
660 
661  return denseLayer;
662 }
663 
664 //______________________________________________________________________________
665 template <typename Architecture_t, typename Layer_t>
667 {
668  fLayers.push_back(denseLayer);
669 }
670 
671 //______________________________________________________________________________
672 template <typename Architecture_t, typename Layer_t>
674  size_t width, bool flattening)
675 {
676  size_t batchSize = this->GetBatchSize();
677  size_t inputDepth;
678  size_t inputHeight;
679  size_t inputWidth;
680  size_t outputNSlices;
681  size_t outputNRows;
682  size_t outputNCols;
683 
684  if (fLayers.size() == 0) {
685  inputDepth = this->GetInputDepth();
686  inputHeight = this->GetInputHeight();
687  inputWidth = this->GetInputWidth();
688  } else {
689  Layer_t *lastLayer = fLayers.back();
690  inputDepth = lastLayer->GetDepth();
691  inputHeight = lastLayer->GetHeight();
692  inputWidth = lastLayer->GetWidth();
693  }
694 
695  if (flattening) {
696  outputNSlices = 1;
697  outputNRows = this->GetBatchSize();
698  outputNCols = depth * height * width;
699  size_t inputNCols = inputDepth * inputHeight * inputWidth;
700  if (outputNCols != 0 && outputNCols != inputNCols ) {
701  Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
702  inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
703  }
704  outputNCols = inputNCols;
705  depth = 1;
706  height = 1;
707  width = outputNCols;
708  } else {
709  outputNSlices = this->GetBatchSize();
710  outputNRows = depth;
711  outputNCols = height * width;
712  }
713 
714  TReshapeLayer<Architecture_t> *reshapeLayer =
715  new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
716  outputNSlices, outputNRows, outputNCols, flattening);
717 
718  fLayers.push_back(reshapeLayer);
719 
720  return reshapeLayer;
721 }
722 
723 //______________________________________________________________________________
724 template <typename Architecture_t, typename Layer_t>
726 {
727  fLayers.push_back(reshapeLayer);
728 }
729 
730 //______________________________________________________________________________
731 template <typename Architecture_t, typename Layer_t>
733 {
734  for (size_t i = 0; i < fLayers.size(); i++) {
735  fLayers[i]->Initialize();
736  }
737 }
738 
739 template <typename Architecture>
740 auto debugTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "tensor") -> void
741 {
742  std::cout << name << "\n";
743  for (size_t l = 0; l < A.size(); ++l) {
744  for (size_t i = 0; i < A[l].GetNrows(); ++i) {
745  for (size_t j = 0; j < A[l].GetNcols(); ++j) {
746  std::cout << A[l](i, j) << " ";
747  }
748  std::cout << "\n";
749  }
750  std::cout << "********\n";
751  }
752 }
753 
754 //______________________________________________________________________________
755 template <typename Architecture_t, typename Layer_t>
756 auto TDeepNet<Architecture_t, Layer_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
757 {
758  fLayers.front()->Forward(input, applyDropout);
759 
760  for (size_t i = 1; i < fLayers.size(); i++) {
761  fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
762  }
763 }
764 
765 //______________________________________________________________________________
766 template <typename Architecture_t, typename Layer_t>
768  std::vector<TTensorBatch<Architecture_t>> &batches,
769  bool applyDropout) -> void
770 {
771  size_t depth = this->GetDepth();
772 
773  // The first layer of each deep net
774  for (size_t i = 0; i < nets.size(); i++) {
775  nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
776  }
777 
778  // The i'th layer of each deep net
779  for (size_t i = 1; i < depth; i++) {
780  for (size_t j = 0; j < nets.size(); j++) {
781  nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
782  }
783  }
784 }
785 
786 #ifdef HAVE_DAE
787 //_____________________________________________________________________________
788 template <typename Architecture_t, typename Layer_t>
789 auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
790  std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
791  Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
792  EActivationFunction f, bool applyDropout) -> void
793 {
794  std::vector<Matrix_t> inp1;
795  std::vector<Matrix_t> inp2;
796  size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
797  // size_t batchSize = this->GetBatchSize();
798  size_t visibleUnits = (size_t)input[0].GetNrows();
799 
800  AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
801  fLayers.back()->Initialize();
802  fLayers.back()->Forward(input, applyDropout);
803  // fLayers.back()->Print();
804 
805  AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
806  fLayers.back()->GetBiases());
807  fLayers.back()->Initialize();
808  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
809 
810  AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
811  fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
812  fLayers.back()->Initialize();
813  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
814  applyDropout); // as we have to pass compressed Input
815  fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
816  input);
817  // three layers are added, now pointer is on third layer
818  size_t weightsSize = fLayers.back()->GetWeights().size();
819  size_t biasesSize = fLayers.back()->GetBiases().size();
820  for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
821  // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
822  for (size_t j = 0; j < weightsSize; j++) {
823  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
824  }
825  for (size_t j = 0; j < biasesSize; j++) {
826  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
827  }
828  fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
829  fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
830  fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
831  fLayers[fLayers.size() - 3]->GetOutput(), input);
832  }
833  fLayers.back()->Print();
834 
835  for (size_t i = 1; i < numOfHiddenLayers; i++) {
836 
837  AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
838  fLayers.back()->Initialize();
839  fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
840  applyDropout); // as we have to pass compressed Input
841 
842  AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
843  fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
844  fLayers.back()->Initialize();
845  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
846 
847  AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
848  fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
849  dropoutProbability);
850  fLayers.back()->Initialize();
851  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
852  applyDropout); // as we have to pass compressed Input
853  fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
854  fLayers[fLayers.size() - 5]->GetOutput());
855 
856  // three layers are added, now pointer is on third layer
857  size_t _weightsSize = fLayers.back()->GetWeights().size();
858  size_t _biasesSize = fLayers.back()->GetBiases().size();
859  for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
860  // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
861  for (size_t j = 0; j < _weightsSize; j++) {
862  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
863  }
864  for (size_t j = 0; j < _biasesSize; j++) {
865  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
866  }
867  fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
868  fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
869  fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
870  fLayers[fLayers.size() - 3]->GetOutput(),
871  fLayers[fLayers.size() - 5]->GetOutput());
872  }
873  fLayers.back()->Print();
874  }
875 }
876 
877 //______________________________________________________________________________
878 template <typename Architecture_t, typename Layer_t>
879 auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
880  std::vector<Matrix_t> &inputLabel, size_t outputUnits,
881  size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
882 {
883  std::vector<Matrix_t> inp1;
884  std::vector<Matrix_t> inp2;
885  if (fLayers.size() == 0) // only Logistic Regression Layer
886  {
887  size_t inputUnits = input[0].GetNrows();
888 
889  AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
890  fLayers.back()->Initialize();
891  for (size_t i = 0; i < epochs; i++) {
892  fLayers.back()->Backward(inputLabel, inp1, input, inp2);
893  }
894  fLayers.back()->Forward(input, false);
895  fLayers.back()->Print();
896  } else { // if used after any other layer
897  size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
898  AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
899  fLayers.back()->Initialize();
900  for (size_t i = 0; i < epochs; i++) {
901  fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
902  }
903  fLayers.back()->Forward(testInput, false);
904  fLayers.back()->Print();
905  }
906 }
907 #endif
908 
909 //______________________________________________________________________________
910 template <typename Architecture_t, typename Layer_t>
911 auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
912  const Matrix_t &weights) -> void
913 {
914  std::vector<Matrix_t> inp1;
915  std::vector<Matrix_t> inp2;
916  // Last layer should be dense layer
917  evaluateGradients<Architecture_t>(fLayers.back()->GetActivationGradientsAt(0), this->GetLossFunction(), groundTruth,
918  fLayers.back()->GetOutputAt(0), weights);
919  for (size_t i = fLayers.size() - 1; i > 0; i--) {
920  std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
921  std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
922  fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
923  }
924 
925  // need to have a dummy tensor (size=0) to pass for activation gradient backward which
926  // are not computed for the first layer
927  std::vector<Matrix_t> dummy;
928  fLayers[0]->Backward(dummy, input, inp1, inp2);
929 }
930 
931 //______________________________________________________________________________
932 template <typename Architecture_t, typename Layer_t>
934  std::vector<TTensorBatch<Architecture_t>> &batches,
935  Scalar_t learningRate) -> void
936 {
937  std::vector<Matrix_t> inp1;
938  std::vector<Matrix_t> inp2;
939  size_t depth = this->GetDepth();
940 
941  // Evaluate the gradients of the last layers in each deep net
942  for (size_t i = 0; i < nets.size(); i++) {
943  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
944  nets[i].GetLossFunction(), batches[i].GetOutput(),
945  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
946  }
947 
948  // Backpropagate the error in i'th layer of each deep net
949  for (size_t i = depth - 1; i > 0; i--) {
950  for (size_t j = 0; j < nets.size(); j++) {
951  nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
952  nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
953  }
954  }
955 
956  std::vector<Matrix_t> dummy;
957 
958  // First layer of each deep net
959  for (size_t i = 0; i < nets.size(); i++) {
960  nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
961  }
962 
963  // Update and copy
964  for (size_t i = 0; i < nets.size(); i++) {
965  for (size_t j = 0; j < depth; j++) {
966  Layer_t *masterLayer = this->GetLayerAt(j);
967  Layer_t *layer = nets[i].GetLayerAt(j);
968 
969  masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
970  layer->CopyWeights(masterLayer->GetWeights());
971 
972  masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
973  layer->CopyBiases(masterLayer->GetBiases());
974  }
975  }
976 }
977 
978 //______________________________________________________________________________
979 template <typename Architecture_t, typename Layer_t>
981  std::vector<TTensorBatch<Architecture_t>> &batches,
982  Scalar_t learningRate, Scalar_t momentum) -> void
983 {
984  std::vector<Matrix_t> inp1;
985  std::vector<Matrix_t> inp2;
986  size_t depth = this->GetDepth();
987 
988  // Evaluate the gradients of the last layers in each deep net
989  for (size_t i = 0; i < nets.size(); i++) {
990  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
991  nets[i].GetLossFunction(), batches[i].GetOutput(),
992  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
993  }
994 
995  // Backpropagate the error in i'th layer of each deep net
996  for (size_t i = depth - 1; i > 0; i--) {
997  Layer_t *masterLayer = this->GetLayerAt(i);
998 
999  for (size_t j = 0; j < nets.size(); j++) {
1000  Layer_t *layer = nets[j].GetLayerAt(i);
1001 
1002  layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1003  inp1, inp2);
1004  masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1005  masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1006  }
1007 
1008  masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1009  masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1010  }
1011 
1012  std::vector<Matrix_t> dummy;
1013 
1014  // First layer of each deep net
1015  Layer_t *masterFirstLayer = this->GetLayerAt(0);
1016  for (size_t i = 0; i < nets.size(); i++) {
1017  Layer_t *layer = nets[i].GetLayerAt(0);
1018 
1019  layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1020 
1021  masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1022  masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1023  }
1024 
1025  masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1026  masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1027 
1028  for (size_t i = 0; i < depth; i++) {
1029  Layer_t *masterLayer = this->GetLayerAt(i);
1030  masterLayer->Update(1.0);
1031 
1032  for (size_t j = 0; j < nets.size(); j++) {
1033  Layer_t *layer = nets[j].GetLayerAt(i);
1034 
1035  layer->CopyWeights(masterLayer->GetWeights());
1036  layer->CopyBiases(masterLayer->GetBiases());
1037  }
1038  }
1039 }
1040 
1041 //______________________________________________________________________________
1042 template <typename Architecture_t, typename Layer_t>
1044  std::vector<TTensorBatch<Architecture_t>> &batches,
1045  Scalar_t learningRate, Scalar_t momentum) -> void
1046 {
1047  std::cout << "Parallel Backward Nestorov" << std::endl;
1048  std::vector<Matrix_t> inp1;
1049  std::vector<Matrix_t> inp2;
1050  size_t depth = this->GetDepth();
1051 
1052  // Evaluate the gradients of the last layers in each deep net
1053  for (size_t i = 0; i < nets.size(); i++) {
1054  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1055  nets[i].GetLossFunction(), batches[i].GetOutput(),
1056  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1057  }
1058 
1059  // Backpropagate the error in i'th layer of each deep net
1060  for (size_t i = depth - 1; i > 0; i--) {
1061  for (size_t j = 0; j < nets.size(); j++) {
1062  Layer_t *layer = nets[j].GetLayerAt(i);
1063 
1064  layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1065  inp1, inp2);
1066  }
1067  }
1068 
1069  std::vector<Matrix_t> dummy;
1070 
1071  // First layer of each deep net
1072  for (size_t i = 0; i < nets.size(); i++) {
1073  Layer_t *layer = nets[i].GetLayerAt(0);
1074  layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1075  }
1076 
1077  for (size_t i = 0; i < depth; i++) {
1078  Layer_t *masterLayer = this->GetLayerAt(i);
1079  for (size_t j = 0; j < nets.size(); j++) {
1080  Layer_t *layer = nets[j].GetLayerAt(i);
1081 
1082  layer->CopyWeights(masterLayer->GetWeights());
1083  layer->CopyBiases(masterLayer->GetBiases());
1084 
1085  layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1086  layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1087  }
1088 
1089  for (size_t j = 0; j < nets.size(); j++) {
1090  Layer_t *layer = nets[j].GetLayerAt(i);
1091 
1092  masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1093  masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1094  }
1095 
1096  masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1097  masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1098 
1099  masterLayer->Update(1.0);
1100  }
1101 }
1102 
1103 //______________________________________________________________________________
1104 template <typename Architecture_t, typename Layer_t>
1106 {
1107  for (size_t i = 0; i < fLayers.size(); i++) {
1108  fLayers[i]->Update(learningRate);
1109  }
1110 }
1111 
1112 //______________________________________________________________________________
1113 template <typename Architecture_t, typename Layer_t>
1114 auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1115  bool includeRegularization) const -> Scalar_t
1116 {
1117  // Last layer should not be deep
1118  auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1119  includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1120 
1121  if (includeRegularization) {
1122  for (size_t i = 0; i < fLayers.size(); i++) {
1123  for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1124  loss += this->GetWeightDecay() *
1125  regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1126  }
1127  }
1128  }
1129 
1130  return loss;
1131 }
1132 
1133 //______________________________________________________________________________
1134 template <typename Architecture_t, typename Layer_t>
1135 auto TDeepNet<Architecture_t, Layer_t>::Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
1136  const Matrix_t &weights, bool applyDropout, bool includeRegularization)
1137  -> Scalar_t
1138 {
1139  Forward(input, applyDropout);
1140  return Loss(groundTruth, weights, includeRegularization);
1141 }
1142 
1143 //______________________________________________________________________________
1144 template <typename Architecture_t, typename Layer_t>
1146 {
1147  // Last layer should not be deep
1148  evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1149 }
1150 
1151 //______________________________________________________________________________
1152 template <typename Architecture_t, typename Layer_t>
1153 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, std::vector<Matrix_t> input,
1154  EOutputFunction f) -> void
1155 {
1156  Forward(input, false);
1157  // Last layer should not be deep
1158  evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1159 }
1160 
1161 //______________________________________________________________________________
1162 template <typename Architecture_t, typename Layer_t>
1164 {
1165  std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1166  std::cout << " Input = ( " << this->GetInputDepth();
1167  std::cout << ", " << this->GetInputHeight();
1168  std::cout << ", " << this->GetInputWidth() << " )";
1169  std::cout << " Batch size = " << this->GetBatchSize();
1170  std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1171 
1172  //std::cout << "\t Layers: " << std::endl;
1173 
1174  for (size_t i = 0; i < fLayers.size(); i++) {
1175  std::cout << "\tLayer " << i << "\t";
1176  fLayers[i]->Print();
1177  }
1178 }
1179 } // namespace DNN
1180 } // namespace TMVA
1181 
1182 #endif
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:315
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:327
~TDeepNet()
Destructor.
Definition: DeepNet.h:372
void Fatal(const char *location, const char *msgfmt,...)
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:379
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width...
Definition: DeepNet.h:673
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:84
void SetRegularization(ERegularization R)
Definition: DeepNet.h:330
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:99
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth, filter height and width, striding in rows and columns, the zero paddings, as well as the activation function and the dropout probability.
Definition: DeepNet.h:397
EInitialization GetInitialization() const
Definition: DeepNet.h:313
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:77
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:54
#define f(i)
Definition: RSha256.hxx:104
bool fIsTraining
Is the network training?
Definition: DeepNet.h:95
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:321
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width...
Definition: DeepNet.h:640
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:933
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:294
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:510
static double A[]
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:98
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:287
EInitialization
Definition: Functions.h:70
Double_t x[n]
Definition: legend1.C:17
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:88
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:86
size_t GetInputHeight() const
Definition: DeepNet.h:303
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:325
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:80
void Info(const char *location, const char *msgfmt,...)
size_t GetBatchDepth() const
Definition: DeepNet.h:306
size_t GetBatchWidth() const
Definition: DeepNet.h:308
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:93
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:331
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:756
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:87
size_t GetInputDepth() const
Definition: DeepNet.h:302
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
Definition: DeepNet.h:286
Generic layer class.
Definition: DenseLayer.h:55
void Error(const char *location, const char *msgfmt,...)
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:980
ELossFunction GetLossFunction() const
Definition: DeepNet.h:312
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1145
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:911
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:1043
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:328
TDeepNet()
Default Constructor.
Definition: DeepNet.h:339
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
Definition: DeepNet.h:767
double floor(double)
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1163
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:295
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:301
ERegularization GetRegularization() const
Definition: DeepNet.h:314
void Initialize()
DAE functions.
Definition: DeepNet.h:732
size_t GetInputWidth() const
Definition: DeepNet.h:304
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:323
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:324
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
Definition: DeepNet.h:740
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width...
Definition: DeepNet.h:460
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:91
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
static Int_t init()
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
static RooMathCoreReg dummy
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:54
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:92
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:322
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:76
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases...
Definition: DeepNet.h:1105
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:89
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:298
bool IsTraining() const
Definition: DeepNet.h:310
Abstract ClassifierFactory template that handles arbitrary types.
void SetInitialization(EInitialization I)
Definition: DeepNet.h:329
size_t GetBatchHeight() const
Definition: DeepNet.h:307
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:326
auto * l
Definition: textangle.C:4
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1114
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:100
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:97
size_t GetDepth() const
Definition: DeepNet.h:290
#define I(x, y, z)
size_t GetOutputWidth() const
Definition: DeepNet.h:291
char name[80]
Definition: TGX11.cxx:109
Generic Deep Neural Network class.
Definition: DeepNet.h:74