Logo ROOT  
Reference Guide
DeepNet.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDeepNet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network *
12 * *
13 * Authors (alphabetical): *
14 * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16 * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17 * *
18 * Copyright (c) 2005-2015: *
19 * CERN, Switzerland *
20 * U. of Victoria, Canada *
21 * MPI-K Heidelberg, Germany *
22 * U. of Bonn, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
31
32#include "TString.h"
33
34#include "TMVA/DNN/Functions.h"
36
38#include "TMVA/DNN/DenseLayer.h"
41
44
48
49#ifdef HAVE_DAE
50#include "TMVA/DNN/DAE/CompressionLayer.h"
51#include "TMVA/DNN/DAE/CorruptionLayer.h"
52#include "TMVA/DNN/DAE/ReconstructionLayer.h"
53#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
54#endif
55
56#include <vector>
57#include <cmath>
58
59
60namespace TMVA {
61namespace DNN {
62
63 using namespace CNN;
64 using namespace RNN;
65
66 //using namespace DAE;
67
68/** \class TDeepNet
69 Generic Deep Neural Network class.
70 This classs encapsulates the information for all types of Deep Neural Networks.
71 \tparam Architecture The Architecture type that holds the
72 architecture-specific data types.
73 */
74template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
75class TDeepNet {
76public:
77
78 using Tensor_t = typename Architecture_t::Tensor_t;
79 using Matrix_t = typename Architecture_t::Matrix_t;
80 using Scalar_t = typename Architecture_t::Scalar_t;
81
82
83private:
84 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
85 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
86
87private:
88 std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
89
90 size_t fBatchSize; ///< Batch size used for training and evaluation.
91 size_t fInputDepth; ///< The depth of the input.
92 size_t fInputHeight; ///< The height of the input.
93 size_t fInputWidth; ///< The width of the input.
94
95 size_t fBatchDepth; ///< The depth of the batch used for training/testing.
96 size_t fBatchHeight; ///< The height of the batch used for training/testing.
97 size_t fBatchWidth; ///< The width of the batch used for training/testing.
98
99 bool fIsTraining; ///< Is the network training?
100
101 ELossFunction fJ; ///< The loss function of the network.
102 EInitialization fI; ///< The initialization method of the network.
103 ERegularization fR; ///< The regularization used for the network.
104 Scalar_t fWeightDecay; ///< The weight decay factor.
105
106public:
107 /*! Default Constructor */
109
110 /*! Constructor */
111 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
112 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
113 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
114
115 /*! Copy-constructor */
117
118 /*! Destructor */
120
121 /*! Function for adding Convolution layer in the Deep Neural Network,
122 * with a given depth, filter height and width, striding in rows and columns,
123 * the zero paddings, as well as the activation function and the dropout
124 * probability. Based on these parameters, it calculates the width and height
125 * of the convolutional layer. */
126 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
127 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
128 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
129
130 /*! Function for adding Convolution Layer in the Deep Neural Network,
131 * when the layer is already created. */
133
134 /*! Function for adding Pooling layer in the Deep Neural Network,
135 * with a given filter height and width, striding in rows and columns as
136 * well as the dropout probability. The depth is same as the previous
137 * layer depth. Based on these parameters, it calculates the width and
138 * height of the pooling layer. */
139 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
140 size_t strideCols, Scalar_t dropoutProbability = 1.0);
141 /*! Function for adding Max Pooling layer in the Deep Neural Network,
142 * when the layer is already created. */
144
145
146 /*! Function for adding Recurrent Layer in the Deep Neural Network,
147 * with given parameters */
148 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
149 bool rememberState = false,bool returnSequence = false,
151
152 /*! Function for adding Vanilla RNN when the layer is already created
153 */
155
156 /*! Function for adding LSTM Layer in the Deep Neural Network,
157 * with given parameters */
158 TBasicLSTMLayer<Architecture_t> *AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
159 bool rememberState = false, bool returnSequence = false);
160
161 /*! Function for adding LSTM Layer in the Deep Neural Network,
162 * when the layer is already created. */
164
165 /*! Function for adding GRU Layer in the Deep Neural Network,
166 * with given parameters */
167 TBasicGRULayer<Architecture_t> *AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps,
168 bool rememberState = false, bool returnSequence = false,
169 bool resetGateAfter = false);
170
171 /*! Function for adding GRU Layer in the Deep Neural Network,
172 * when the layer is already created. */
174
175 /*! Function for adding Dense Connected Layer in the Deep Neural Network,
176 * with a given width, activation function and dropout probability.
177 * Based on the previous layer dimensions, it calculates the input width
178 * of the fully connected layer. */
180
181 /*! Function for adding Dense Layer in the Deep Neural Network, when
182 * the layer is already created. */
184
185 /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
186 * height and width. It will take every matrix from the previous layer and
187 * reshape it to a matrix with new dimensions. */
188 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
189
190 /*! Function for adding a Batch Normalization layer with given parameters */
192
193 /*! Function for adding Reshape Layer in the Deep Neural Network, when
194 * the layer is already created. */
196
197#ifdef HAVE_DAE /// DAE functions
198 /*! Function for adding Corruption layer in the Deep Neural Network,
199 * with given number of visibleUnits and hiddenUnits. It corrupts input
200 * according to given corruptionLevel and dropoutProbability. */
201 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
202 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
203
204 /*! Function for adding Corruption Layer in the Deep Neural Network,
205 * when the layer is already created. */
206 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
207
208 /*! Function for adding Compression layer in the Deep Neural Network,
209 * with given number of visibleUnits and hiddenUnits. It compresses the input units
210 * taking weights and biases from prev layers. */
211 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
212 Scalar_t dropoutProbability, EActivationFunction f,
213 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
214
215 /*! Function for adding Compression Layer in the Deep Neural Network, when
216 * the layer is already created. */
217 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
218
219 /*! Function for adding Reconstruction layer in the Deep Neural Network,
220 * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
221 * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
222 * must be passed as in corruptionLayer. */
223 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
224 Scalar_t learningRate, EActivationFunction f,
225 std::vector<Matrix_t> weights,
226 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
227 Scalar_t dropoutProbability);
228
229 /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
230 * the layer is already created. */
231 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
232
233 /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
234 * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
235 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
236 size_t testDataBatchSize,
237 Scalar_t learningRate);
238
239 /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
240 * the layer is already created. */
241 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
242
243 /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
244 * layers. */
245 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
246 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
247 bool applyDropout = false);
248
249 /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
250 * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
251 */
252 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
253 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
254#endif
255
256 /*! Function for initialization of the Neural Net. */
258
259 /*! Function that executes the entire forward pass in the network. */
260 void Forward(Tensor_t &input, bool applyDropout = false);
261
262 /*! Function that reset some training flags after looping all the events but not the weights*/
264
265
266
267 /*! Function that executes the entire backward pass in the network. */
268 void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights);
269
270
271#ifdef USE_PARALLEL_DEEPNET
272 /*! Function for parallel forward in the vector of deep nets, where the master
273 * net is the net calling this function. There is one batch for one deep net.*/
274 void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
275 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
276
277 /*! Function for parallel backward in the vector of deep nets, where the master
278 * net is the net calling this function and getting the updates from the other nets.
279 * There is one batch for one deep net.*/
280 void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
281 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
282
283 /*! Function for parallel backward in the vector of deep nets, where the master
284 * net is the net calling this function and getting the updates from the other nets,
285 * following the momentum strategy. There is one batch for one deep net.*/
286 void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
287 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
288 Scalar_t momentum);
289
290 /*! Function for parallel backward in the vector of deep nets, where the master
291 * net is the net calling this function and getting the updates from the other nets,
292 * following the Nestorov momentum strategy. There is one batch for one deep net.*/
293 void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
294 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
295 Scalar_t momentum);
296
297#endif // endif use parallel deepnet
298
299 /*! Function that will update the weights and biases in the layers that
300 * contain weights and biases. */
301 void Update(Scalar_t learningRate);
302
303 /*! Function for evaluating the loss, based on the activations stored
304 * in the last layer. */
305 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
306
307 /*! Function for evaluating the loss, based on the propagation of the given input. */
308 Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights,
309 bool inTraining = false, bool includeRegularization = true);
310
311 /*! Function for computing the regularizaton term to be added to the loss function */
313
314 /*! Prediction based on activations stored in the last layer. */
315 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
316
317 /*! Prediction for the given inputs, based on what network learned. */
318 void Prediction(Matrix_t &predictions, Tensor_t & input, EOutputFunction f);
319
320 /*! Print the Deep Net Info */
321 void Print() const;
322
323 /*! Get the layer in the vector of layers at poistion i */
324 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
325 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
326
327 /* Depth and the output width of the network. */
328 inline size_t GetDepth() const { return fLayers.size(); }
329 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
330
331 /* Return a reference to the layers. */
332 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
333 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
334
335 /*! Remove all layers from the network. */
336 inline void Clear() { fLayers.clear(); }
337
338 /*! Getters */
339 inline size_t GetBatchSize() const { return fBatchSize; }
340 inline size_t GetInputDepth() const { return fInputDepth; }
341 inline size_t GetInputHeight() const { return fInputHeight; }
342 inline size_t GetInputWidth() const { return fInputWidth; }
343
344 inline size_t GetBatchDepth() const { return fBatchDepth; }
345 inline size_t GetBatchHeight() const { return fBatchHeight; }
346 inline size_t GetBatchWidth() const { return fBatchWidth; }
347
348 inline bool IsTraining() const { return fIsTraining; }
349
350 inline ELossFunction GetLossFunction() const { return fJ; }
351 inline EInitialization GetInitialization() const { return fI; }
352 inline ERegularization GetRegularization() const { return fR; }
353 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
354
355 /*! Setters */
356 // FIXME many of these won't work as the data structure storing activations
357 // and gradients have not changed in all the layers, also params in layers
358 // have not changed either
359 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
360 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
361 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
362 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
363 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
364 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
365 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
366 inline void SetLossFunction(ELossFunction J) { fJ = J; }
370
371 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
372
373};
374
375//
376// Deep Net Class - Implementation
377//
378//______________________________________________________________________________
379template <typename Architecture_t, typename Layer_t>
381 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
382 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
383 fIsTraining(true), fWeightDecay(0.0)
384{
385 // Nothing to do here.
386}
387
388//______________________________________________________________________________
389template <typename Architecture_t, typename Layer_t>
390TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
391 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
393 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
394 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
395 fR(R), fWeightDecay(weightDecay)
396{
397 // Nothing to do here.
398}
399
400//______________________________________________________________________________
401template <typename Architecture_t, typename Layer_t>
403 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
404 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
405 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
406 fWeightDecay(deepNet.fWeightDecay)
407{
408 // Nothing to do here.
409}
410
411//______________________________________________________________________________
412template <typename Architecture_t, typename Layer_t>
414{
415 // Relese the layers memory
416 for (auto layer : fLayers)
417 delete layer;
418 fLayers.clear();
419}
420
421//______________________________________________________________________________
422template <typename Architecture_t, typename Layer_t>
423auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
424{
425 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
426 if (!isInteger(dimension) || dimension <= 0) {
427 this->Print();
428 int iLayer = fLayers.size();
429 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
430 iLayer, imgDim, fltDim, padding, stride);
431 // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
432 // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
433 // std::exit(EXIT_FAILURE);
434 }
435
436 return (size_t)dimension;
437}
438
439//______________________________________________________________________________
440template <typename Architecture_t, typename Layer_t>
442 size_t filterWidth, size_t strideRows,
443 size_t strideCols, size_t paddingHeight,
444 size_t paddingWidth, EActivationFunction f,
445 Scalar_t dropoutProbability)
446{
447 // All variables defining a convolutional layer
448 size_t batchSize = this->GetBatchSize();
449 size_t inputDepth;
450 size_t inputHeight;
451 size_t inputWidth;
452 EInitialization init = this->GetInitialization();
453 ERegularization reg = this->GetRegularization();
454 Scalar_t decay = this->GetWeightDecay();
455
456 if (fLayers.size() == 0) {
457 inputDepth = this->GetInputDepth();
458 inputHeight = this->GetInputHeight();
459 inputWidth = this->GetInputWidth();
460 } else {
461 Layer_t *lastLayer = fLayers.back();
462 inputDepth = lastLayer->GetDepth();
463 inputHeight = lastLayer->GetHeight();
464 inputWidth = lastLayer->GetWidth();
465 }
466
467
468
469 // Create the conv layer
471 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
472 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
473
474 fLayers.push_back(convLayer);
475 return convLayer;
476}
477
478//______________________________________________________________________________
479template <typename Architecture_t, typename Layer_t>
481{
482 fLayers.push_back(convLayer);
483}
484
485//______________________________________________________________________________
486template <typename Architecture_t, typename Layer_t>
488 size_t strideRows, size_t strideCols,
489 Scalar_t dropoutProbability)
490{
491 size_t batchSize = this->GetBatchSize();
492 size_t inputDepth;
493 size_t inputHeight;
494 size_t inputWidth;
495
496 if (fLayers.size() == 0) {
497 inputDepth = this->GetInputDepth();
498 inputHeight = this->GetInputHeight();
499 inputWidth = this->GetInputWidth();
500 } else {
501 Layer_t *lastLayer = fLayers.back();
502 inputDepth = lastLayer->GetDepth();
503 inputHeight = lastLayer->GetHeight();
504 inputWidth = lastLayer->GetWidth();
505 }
506
508 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
509 strideRows, strideCols, dropoutProbability);
510
511 // But this creates a copy or what?
512 fLayers.push_back(maxPoolLayer);
513
514 return maxPoolLayer;
515}
516
517//______________________________________________________________________________
518template <typename Architecture_t, typename Layer_t>
520{
521 fLayers.push_back(maxPoolLayer);
522}
523
524//______________________________________________________________________________
525template <typename Architecture_t, typename Layer_t>
527 size_t timeSteps,
528 bool rememberState, bool returnSequence,
530{
531
532 // should check if input and time size are consistent
533
534 //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
535 size_t inputHeight, inputWidth, inputDepth;
536 if (fLayers.size() == 0) {
537 inputHeight = this->GetInputHeight();
538 inputWidth = this->GetInputWidth();
539 inputDepth = this->GetInputDepth();
540 } else {
541 Layer_t *lastLayer = fLayers.back();
542 inputHeight = lastLayer->GetHeight();
543 inputWidth = lastLayer->GetWidth();
544 inputDepth = lastLayer->GetDepth();
545 }
546 if (inputSize != inputWidth) {
547 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
548 }
549 if (timeSteps != inputHeight && timeSteps != inputDepth) {
550 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
551 }
552
553 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
554 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
555 f, fIsTraining, this->GetInitialization());
556 fLayers.push_back(basicRNNLayer);
557 return basicRNNLayer;
558}
559
560//______________________________________________________________________________
561template <typename Architecture_t, typename Layer_t>
563{
564 fLayers.push_back(basicRNNLayer);
565}
566
567//______________________________________________________________________________
568template <typename Architecture_t, typename Layer_t>
570 size_t timeSteps, bool rememberState, bool returnSequence)
571{
572 // should check if input and time size are consistent
573 size_t inputHeight, inputWidth, inputDepth;
574 if (fLayers.size() == 0) {
575 inputHeight = this->GetInputHeight();
576 inputWidth = this->GetInputWidth();
577 inputDepth = this->GetInputDepth();
578 } else {
579 Layer_t *lastLayer = fLayers.back();
580 inputHeight = lastLayer->GetHeight();
581 inputWidth = lastLayer->GetWidth();
582 inputDepth = lastLayer->GetDepth();
583 }
584 if (inputSize != inputWidth) {
585 Error("AddBasicLSTMLayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
586 }
587 if (timeSteps != inputHeight && timeSteps != inputDepth) {
588 Error("AddBasicLSTMLayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
589 }
590
591 TBasicLSTMLayer<Architecture_t> *basicLSTMLayer =
592 new TBasicLSTMLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
595 fIsTraining, this->GetInitialization());
596 fLayers.push_back(basicLSTMLayer);
597 return basicLSTMLayer;
598}
599
600//______________________________________________________________________________
601template <typename Architecture_t, typename Layer_t>
603{
604 fLayers.push_back(basicLSTMLayer);
605}
606
607
608//______________________________________________________________________________
609template <typename Architecture_t, typename Layer_t>
611 size_t timeSteps, bool rememberState, bool returnSequence, bool resetGateAfter)
612{
613 // should check if input and time size are consistent
614 size_t inputHeight, inputWidth, inputDepth;
615 if (fLayers.size() == 0) {
616 inputHeight = this->GetInputHeight();
617 inputWidth = this->GetInputWidth();
618 inputDepth = this->GetInputDepth();
619 } else {
620 Layer_t *lastLayer = fLayers.back();
621 inputHeight = lastLayer->GetHeight();
622 inputWidth = lastLayer->GetWidth();
623 inputDepth = lastLayer->GetDepth();
624 }
625 if (inputSize != inputWidth) {
626 Error("AddBasicGRULayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
627 }
628 if (timeSteps != inputHeight && timeSteps != inputDepth) {
629 Error("AddBasicGRULayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
630 }
631
632 TBasicGRULayer<Architecture_t> *basicGRULayer =
633 new TBasicGRULayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter,
636 fIsTraining, this->GetInitialization());
637 fLayers.push_back(basicGRULayer);
638 return basicGRULayer;
639}
640
641//______________________________________________________________________________
642template <typename Architecture_t, typename Layer_t>
644{
645 fLayers.push_back(basicGRULayer);
646}
647
648
649
650//DAE
651#ifdef HAVE_DAE
652
653//______________________________________________________________________________
654template <typename Architecture_t, typename Layer_t>
655TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
656 size_t hiddenUnits,
657 Scalar_t dropoutProbability,
658 Scalar_t corruptionLevel)
659{
660 size_t batchSize = this->GetBatchSize();
661
662 TCorruptionLayer<Architecture_t> *corruptionLayer =
663 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
664 fLayers.push_back(corruptionLayer);
665 return corruptionLayer;
666}
667//______________________________________________________________________________
668
669template <typename Architecture_t, typename Layer_t>
670void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
671{
672 fLayers.push_back(corruptionLayer);
673}
674
675//______________________________________________________________________________
676template <typename Architecture_t, typename Layer_t>
677TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
678 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
679 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
680{
681 size_t batchSize = this->GetBatchSize();
682
683 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
684 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
685 fLayers.push_back(compressionLayer);
686 return compressionLayer;
687}
688//______________________________________________________________________________
689
690template <typename Architecture_t, typename Layer_t>
691void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
692{
693 fLayers.push_back(compressionLayer);
694}
695
696//______________________________________________________________________________
697template <typename Architecture_t, typename Layer_t>
698TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
699 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
700 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
701{
702 size_t batchSize = this->GetBatchSize();
703
704 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
705 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
706 fLayers.push_back(reconstructionLayer);
707 return reconstructionLayer;
708}
709//______________________________________________________________________________
710
711template <typename Architecture_t, typename Layer_t>
712void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
713 TReconstructionLayer<Architecture_t> *reconstructionLayer)
714{
715 fLayers.push_back(reconstructionLayer);
716}
717
718//______________________________________________________________________________
719template <typename Architecture_t, typename Layer_t>
720TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
721 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
722{
723 size_t batchSize = this->GetBatchSize();
724
725 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
726 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
727 fLayers.push_back(logisticRegressionLayer);
728 return logisticRegressionLayer;
729}
730//______________________________________________________________________________
731template <typename Architecture_t, typename Layer_t>
732void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
733 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
734{
735 fLayers.push_back(logisticRegressionLayer);
736}
737#endif
738
739
740//______________________________________________________________________________
741template <typename Architecture_t, typename Layer_t>
743 Scalar_t dropoutProbability)
744{
745 size_t batchSize = this->GetBatchSize();
746 size_t inputWidth;
747 EInitialization init = this->GetInitialization();
748 ERegularization reg = this->GetRegularization();
749 Scalar_t decay = this->GetWeightDecay();
750
751 if (fLayers.size() == 0) {
752 inputWidth = this->GetInputWidth();
753 } else {
754 Layer_t *lastLayer = fLayers.back();
755 inputWidth = lastLayer->GetWidth();
756 }
757
758 TDenseLayer<Architecture_t> *denseLayer =
759 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
760
761 fLayers.push_back(denseLayer);
762
763 return denseLayer;
764}
765
766//______________________________________________________________________________
767template <typename Architecture_t, typename Layer_t>
769{
770 fLayers.push_back(denseLayer);
771}
772
773//______________________________________________________________________________
774template <typename Architecture_t, typename Layer_t>
776 size_t width, bool flattening)
777{
778 size_t batchSize = this->GetBatchSize();
779 size_t inputDepth;
780 size_t inputHeight;
781 size_t inputWidth;
782 size_t outputNSlices;
783 size_t outputNRows;
784 size_t outputNCols;
785
786 if (fLayers.size() == 0) {
787 inputDepth = this->GetInputDepth();
788 inputHeight = this->GetInputHeight();
789 inputWidth = this->GetInputWidth();
790 } else {
791 Layer_t *lastLayer = fLayers.back();
792 inputDepth = lastLayer->GetDepth();
793 inputHeight = lastLayer->GetHeight();
794 inputWidth = lastLayer->GetWidth();
795 }
796
797 if (flattening) {
798 outputNSlices = 1;
799 outputNRows = this->GetBatchSize();
800 outputNCols = depth * height * width;
801 size_t inputNCols = inputDepth * inputHeight * inputWidth;
802 if (outputNCols != 0 && outputNCols != inputNCols ) {
803 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
804 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
805 }
806 outputNCols = inputNCols;
807 depth = 1;
808 height = 1;
809 width = outputNCols;
810 } else {
811 outputNSlices = this->GetBatchSize();
812 outputNRows = depth;
813 outputNCols = height * width;
814 }
815
816 TReshapeLayer<Architecture_t> *reshapeLayer =
817 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
818 outputNSlices, outputNRows, outputNCols, flattening);
819
820 fLayers.push_back(reshapeLayer);
821
822 return reshapeLayer;
823}
824
825//______________________________________________________________________________
826template <typename Architecture_t, typename Layer_t>
828{
829 int axis = -1;
830 size_t batchSize = this->GetBatchSize();
831 size_t inputDepth = 0;
832 size_t inputHeight = 0;
833 size_t inputWidth = 0;
834 // this is the shape of the output tensor (it is columnmajor by default)
835 // and it is normally (depth, hw, bsize) and for dense layers (bsize, w, 1)
836 std::vector<size_t> shape = {1, 1, 1};
837 if (fLayers.size() == 0) {
838 inputDepth = this->GetInputDepth();
839 inputHeight = this->GetInputHeight();
840 inputWidth = this->GetInputWidth();
841 // assume that is like for a dense layer
842 shape[0] = batchSize;
843 shape[1] = inputWidth;
844 shape[2] = 1;
845 } else {
846 Layer_t *lastLayer = fLayers.back();
847 inputDepth = lastLayer->GetDepth();
848 inputHeight = lastLayer->GetHeight();
849 inputWidth = lastLayer->GetWidth();
850 shape = lastLayer->GetOutput().GetShape();
851 if (dynamic_cast<TConvLayer<Architecture_t> *>(lastLayer) != nullptr ||
852 dynamic_cast<TMaxPoolLayer<Architecture_t> *>(lastLayer) != nullptr)
853 axis = 1; // use axis = channel axis for convolutional layer
854 if (shape.size() > 3) {
855 for (size_t i = 3; i < shape.size(); ++i)
856 shape[2] *= shape[i];
857 }
858 }
859 // std::cout << "addBNormLayer " << inputDepth << " , " << inputHeight << " , " << inputWidth << " , " << shape[0]
860 // << " " << shape[1] << " " << shape[2] << std::endl;
861
862 auto bnormLayer =
863 new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
864
865 fLayers.push_back(bnormLayer);
866
867 return bnormLayer;
868}
869
870//______________________________________________________________________________
871template <typename Architecture_t, typename Layer_t>
873{
874 fLayers.push_back(reshapeLayer);
875}
876
877//______________________________________________________________________________
878template <typename Architecture_t, typename Layer_t>
880{
881 for (size_t i = 0; i < fLayers.size(); i++) {
882 fLayers[i]->Initialize();
883 }
884}
885
886//______________________________________________________________________________
887template <typename Architecture_t, typename Layer_t>
889{
890 for (size_t i = 0; i < fLayers.size(); i++) {
891 fLayers[i]->ResetTraining();
892 }
893}
894
895
896//______________________________________________________________________________
897template <typename Architecture_t, typename Layer_t>
898auto TDeepNet<Architecture_t, Layer_t>::Forward( Tensor_t &input, bool applyDropout) -> void
899{
900 fLayers.front()->Forward(input, applyDropout);
901
902 for (size_t i = 1; i < fLayers.size(); i++) {
903 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
904 //std::cout << "forward for layer " << i << std::endl;
905 // fLayers[i]->GetOutput()[0].Print();
906 }
907}
908
909
910#ifdef HAVE_DAE
911//_____________________________________________________________________________
912template <typename Architecture_t, typename Layer_t>
913auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
914 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
915 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
916 EActivationFunction f, bool applyDropout) -> void
917{
918 std::vector<Matrix_t> inp1;
919 std::vector<Matrix_t> inp2;
920 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
921 // size_t batchSize = this->GetBatchSize();
922 size_t visibleUnits = (size_t)input[0].GetNrows();
923
924 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
925 fLayers.back()->Initialize();
926 fLayers.back()->Forward(input, applyDropout);
927 // fLayers.back()->Print();
928
929 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
930 fLayers.back()->GetBiases());
931 fLayers.back()->Initialize();
932 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
933
934 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
935 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
936 fLayers.back()->Initialize();
937 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
938 applyDropout); // as we have to pass compressed Input
939 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
940 input);
941 // three layers are added, now pointer is on third layer
942 size_t weightsSize = fLayers.back()->GetWeights().size();
943 size_t biasesSize = fLayers.back()->GetBiases().size();
944 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
945 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
946 for (size_t j = 0; j < weightsSize; j++) {
947 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
948 }
949 for (size_t j = 0; j < biasesSize; j++) {
950 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
951 }
952 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
953 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
954 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
955 fLayers[fLayers.size() - 3]->GetOutput(), input);
956 }
957 fLayers.back()->Print();
958
959 for (size_t i = 1; i < numOfHiddenLayers; i++) {
960
961 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
962 fLayers.back()->Initialize();
963 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
964 applyDropout); // as we have to pass compressed Input
965
966 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
967 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
968 fLayers.back()->Initialize();
969 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
970
971 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
972 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
973 dropoutProbability);
974 fLayers.back()->Initialize();
975 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
976 applyDropout); // as we have to pass compressed Input
977 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
978 fLayers[fLayers.size() - 5]->GetOutput());
979
980 // three layers are added, now pointer is on third layer
981 size_t _weightsSize = fLayers.back()->GetWeights().size();
982 size_t _biasesSize = fLayers.back()->GetBiases().size();
983 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
984 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
985 for (size_t j = 0; j < _weightsSize; j++) {
986 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
987 }
988 for (size_t j = 0; j < _biasesSize; j++) {
989 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
990 }
991 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
992 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
993 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
994 fLayers[fLayers.size() - 3]->GetOutput(),
995 fLayers[fLayers.size() - 5]->GetOutput());
996 }
997 fLayers.back()->Print();
998 }
999}
1000
1001//______________________________________________________________________________
1002template <typename Architecture_t, typename Layer_t>
1003auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
1004 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
1005 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
1006{
1007 std::vector<Matrix_t> inp1;
1008 std::vector<Matrix_t> inp2;
1009 if (fLayers.size() == 0) // only Logistic Regression Layer
1010 {
1011 size_t inputUnits = input[0].GetNrows();
1012
1013 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1014 fLayers.back()->Initialize();
1015 for (size_t i = 0; i < epochs; i++) {
1016 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1017 }
1018 fLayers.back()->Forward(input, false);
1019 fLayers.back()->Print();
1020 } else { // if used after any other layer
1021 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1022 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1023 fLayers.back()->Initialize();
1024 for (size_t i = 0; i < epochs; i++) {
1025 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1026 }
1027 fLayers.back()->Forward(testInput, false);
1028 fLayers.back()->Print();
1029 }
1030}
1031#endif
1032
1033//______________________________________________________________________________
1034template <typename Architecture_t, typename Layer_t>
1036 const Matrix_t &weights) -> void
1037{
1038 //Tensor_t inp1;
1039 //Tensor_t inp2;
1040 // Last layer should be dense layer
1041 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
1042 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
1043 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
1044 last_output, weights);
1045
1046 for (size_t i = fLayers.size() - 1; i > 0; i--) {
1047 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
1048 auto &activations_backward = fLayers[i - 1]->GetOutput();
1049 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1050 }
1051
1052 // need to have a dummy tensor (size=0) to pass for activation gradient backward which
1053 // are not computed for the first layer
1055 fLayers[0]->Backward(dummy, input);
1056}
1057
1058#ifdef USE_PARALLEL_DEEPNET
1059
1060//______________________________________________________________________________
1061template <typename Architecture_t, typename Layer_t>
1063 std::vector<TTensorBatch<Architecture_t>> &batches,
1064 bool applyDropout) -> void
1065{
1066 size_t depth = this->GetDepth();
1067
1068 // The first layer of each deep net
1069 for (size_t i = 0; i < nets.size(); i++) {
1070 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1071 }
1072
1073 // The i'th layer of each deep net
1074 for (size_t i = 1; i < depth; i++) {
1075 for (size_t j = 0; j < nets.size(); j++) {
1076 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1077 }
1078 }
1079}
1080
1081//______________________________________________________________________________
1082template <typename Architecture_t, typename Layer_t>
1083auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1084 std::vector<TTensorBatch<Architecture_t>> &batches,
1085 Scalar_t learningRate) -> void
1086{
1087 std::vector<Matrix_t> inp1;
1088 std::vector<Matrix_t> inp2;
1089 size_t depth = this->GetDepth();
1090
1091 // Evaluate the gradients of the last layers in each deep net
1092 for (size_t i = 0; i < nets.size(); i++) {
1093 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1094 nets[i].GetLossFunction(), batches[i].GetOutput(),
1095 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1096 }
1097
1098 // Backpropagate the error in i'th layer of each deep net
1099 for (size_t i = depth - 1; i > 0; i--) {
1100 for (size_t j = 0; j < nets.size(); j++) {
1101 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1102 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1103 }
1104 }
1105
1106 std::vector<Matrix_t> dummy;
1107
1108 // First layer of each deep net
1109 for (size_t i = 0; i < nets.size(); i++) {
1110 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1111 }
1112
1113 // Update and copy
1114 for (size_t i = 0; i < nets.size(); i++) {
1115 for (size_t j = 0; j < depth; j++) {
1116 Layer_t *masterLayer = this->GetLayerAt(j);
1117 Layer_t *layer = nets[i].GetLayerAt(j);
1118
1119 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1120 layer->CopyWeights(masterLayer->GetWeights());
1121
1122 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1123 layer->CopyBiases(masterLayer->GetBiases());
1124 }
1125 }
1126}
1127
1128//______________________________________________________________________________
1129template <typename Architecture_t, typename Layer_t>
1130auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1131 std::vector<TTensorBatch<Architecture_t>> &batches,
1132 Scalar_t learningRate, Scalar_t momentum) -> void
1133{
1134 std::vector<Matrix_t> inp1;
1135 std::vector<Matrix_t> inp2;
1136 size_t depth = this->GetDepth();
1137
1138 // Evaluate the gradients of the last layers in each deep net
1139 for (size_t i = 0; i < nets.size(); i++) {
1140 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1141 nets[i].GetLossFunction(), batches[i].GetOutput(),
1142 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1143 }
1144
1145 // Backpropagate the error in i'th layer of each deep net
1146 for (size_t i = depth - 1; i > 0; i--) {
1147 Layer_t *masterLayer = this->GetLayerAt(i);
1148
1149 for (size_t j = 0; j < nets.size(); j++) {
1150 Layer_t *layer = nets[j].GetLayerAt(i);
1151
1152 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1153 inp1, inp2);
1154 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1155 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1156 }
1157
1158 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1159 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1160 }
1161
1162 std::vector<Matrix_t> dummy;
1163
1164 // First layer of each deep net
1165 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1166 for (size_t i = 0; i < nets.size(); i++) {
1167 Layer_t *layer = nets[i].GetLayerAt(0);
1168
1169 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1170
1171 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1172 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1173 }
1174
1175 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1176 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1177
1178 for (size_t i = 0; i < depth; i++) {
1179 Layer_t *masterLayer = this->GetLayerAt(i);
1180 masterLayer->Update(1.0);
1181
1182 for (size_t j = 0; j < nets.size(); j++) {
1183 Layer_t *layer = nets[j].GetLayerAt(i);
1184
1185 layer->CopyWeights(masterLayer->GetWeights());
1186 layer->CopyBiases(masterLayer->GetBiases());
1187 }
1188 }
1189}
1190
1191//______________________________________________________________________________
1192template <typename Architecture_t, typename Layer_t>
1193auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1194 std::vector<TTensorBatch<Architecture_t>> &batches,
1195 Scalar_t learningRate, Scalar_t momentum) -> void
1196{
1197 std::cout << "Parallel Backward Nestorov" << std::endl;
1198 std::vector<Matrix_t> inp1;
1199 std::vector<Matrix_t> inp2;
1200 size_t depth = this->GetDepth();
1201
1202 // Evaluate the gradients of the last layers in each deep net
1203 for (size_t i = 0; i < nets.size(); i++) {
1204 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1205 nets[i].GetLossFunction(), batches[i].GetOutput(),
1206 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1207 }
1208
1209 // Backpropagate the error in i'th layer of each deep net
1210 for (size_t i = depth - 1; i > 0; i--) {
1211 for (size_t j = 0; j < nets.size(); j++) {
1212 Layer_t *layer = nets[j].GetLayerAt(i);
1213
1214 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1215 inp1, inp2);
1216 }
1217 }
1218
1219 std::vector<Matrix_t> dummy;
1220
1221 // First layer of each deep net
1222 for (size_t i = 0; i < nets.size(); i++) {
1223 Layer_t *layer = nets[i].GetLayerAt(0);
1224 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1225 }
1226
1227 for (size_t i = 0; i < depth; i++) {
1228 Layer_t *masterLayer = this->GetLayerAt(i);
1229 for (size_t j = 0; j < nets.size(); j++) {
1230 Layer_t *layer = nets[j].GetLayerAt(i);
1231
1232 layer->CopyWeights(masterLayer->GetWeights());
1233 layer->CopyBiases(masterLayer->GetBiases());
1234
1235 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1236 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1237 }
1238
1239 for (size_t j = 0; j < nets.size(); j++) {
1240 Layer_t *layer = nets[j].GetLayerAt(i);
1241
1242 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1243 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1244 }
1245
1246 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1247 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1248
1249 masterLayer->Update(1.0);
1250 }
1251}
1252#endif // use parallel deep net
1253
1254//______________________________________________________________________________
1255template <typename Architecture_t, typename Layer_t>
1257{
1258 for (size_t i = 0; i < fLayers.size(); i++) {
1259 fLayers[i]->Update(learningRate);
1260 }
1261}
1262
1263//______________________________________________________________________________
1264template <typename Architecture_t, typename Layer_t>
1265auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1266 bool includeRegularization) const -> Scalar_t
1267{
1268 // Last layer should not be deep
1269 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1270
1271 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1272 if (includeRegularization) {
1273 loss += RegularizationTerm();
1274 }
1275
1276 return loss;
1277}
1278
1279//______________________________________________________________________________
1280template <typename Architecture_t, typename Layer_t>
1282 const Matrix_t &weights, bool inTraining, bool includeRegularization)
1283 -> Scalar_t
1284{
1285 Forward(input, inTraining);
1286 return Loss(groundTruth, weights, includeRegularization);
1287}
1288
1289//______________________________________________________________________________
1290template <typename Architecture_t, typename Layer_t>
1292{
1293 Scalar_t reg = 0.0;
1294 for (size_t i = 0; i < fLayers.size(); i++) {
1295 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1296 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1297 }
1298 }
1299 return this->GetWeightDecay() * reg;
1300}
1301
1302
1303//______________________________________________________________________________
1304template <typename Architecture_t, typename Layer_t>
1306{
1307 // Last layer should not be deep (assume output is a matrix)
1308 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1309}
1310
1311//______________________________________________________________________________
1312template <typename Architecture_t, typename Layer_t>
1314 EOutputFunction f) -> void
1315{
1316 Forward(input, false);
1317 // Last layer should not be deep
1318 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1319}
1320
1321//______________________________________________________________________________
1322template <typename Architecture_t, typename Layer_t>
1324{
1325 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1326 std::cout << " Input = ( " << this->GetInputDepth();
1327 std::cout << ", " << this->GetInputHeight();
1328 std::cout << ", " << this->GetInputWidth() << " )";
1329 std::cout << " Batch size = " << this->GetBatchSize();
1330 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1331
1332 //std::cout << "\t Layers: " << std::endl;
1333
1334 for (size_t i = 0; i < fLayers.size(); i++) {
1335 std::cout << "\tLayer " << i << "\t";
1336 fLayers[i]->Print();
1337 }
1338}
1339
1340//______________________________________________________________________________
1341template <typename Architecture_t, typename Layer_t>
1343 const std::vector<Double_t> & probabilities)
1344{
1345 for (size_t i = 0; i < fLayers.size(); i++) {
1346 if (i < probabilities.size()) {
1347 fLayers[i]->SetDropoutProbability(probabilities[i]);
1348 } else {
1349 fLayers[i]->SetDropoutProbability(1.0);
1350 }
1351 }
1352}
1353
1354
1355} // namespace DNN
1356} // namespace TMVA
1357
1358#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
double floor(double)
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:59
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
Definition: DeepNet.h:75
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:333
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:768
size_t GetBatchHeight() const
Definition: DeepNet.h:345
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:363
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:898
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:366
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:96
ERegularization GetRegularization() const
Definition: DeepNet.h:352
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:643
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:332
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:80
void Initialize()
DAE functions.
Definition: DeepNet.h:879
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:339
size_t GetDepth() const
Definition: DeepNet.h:328
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:353
size_t GetInputDepth() const
Definition: DeepNet.h:340
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:827
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:1035
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:88
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:95
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:91
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
Definition: DeepNet.h:324
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1323
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:610
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:369
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:872
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:336
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
Definition: DeepNet.h:1291
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:742
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
Definition: DeepNet.h:390
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
Definition: DeepNet.h:1313
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:360
bool IsTraining() const
Definition: DeepNet.h:348
size_t GetInputHeight() const
Definition: DeepNet.h:341
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:90
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1305
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:93
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:361
size_t GetBatchWidth() const
Definition: DeepNet.h:346
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
Definition: DeepNet.h:562
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:569
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:519
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:487
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:104
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1265
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:441
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:103
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
Definition: DeepNet.h:888
size_t GetInputWidth() const
Definition: DeepNet.h:342
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:84
size_t GetOutputWidth() const
Definition: DeepNet.h:329
bool fIsTraining
Is the network training?
Definition: DeepNet.h:99
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:775
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:359
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:480
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:92
void SetRegularization(ERegularization R)
Definition: DeepNet.h:368
TDeepNet(const TDeepNet &)
Copy-constructor.
Definition: DeepNet.h:402
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:97
typename Architecture_t::Tensor_t Tensor_t
Definition: DeepNet.h:78
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:101
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:526
~TDeepNet()
Destructor.
Definition: DeepNet.h:413
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:365
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: DeepNet.h:1342
TDeepNet()
Default Constructor.
Definition: DeepNet.h:380
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:364
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
Definition: DeepNet.h:1256
ELossFunction GetLossFunction() const
Definition: DeepNet.h:350
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:423
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:325
void SetInitialization(EInitialization I)
Definition: DeepNet.h:367
EInitialization GetInitialization() const
Definition: DeepNet.h:351
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:362
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:79
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:602
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
Definition: DeepNet.h:1281
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:102
size_t GetBatchDepth() const
Definition: DeepNet.h:344
Generic layer class.
Definition: DenseLayer.h:57
Double_t x[n]
Definition: legend1.C:17
#define I(x, y, z)
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EInitialization
Definition: Functions.h:72
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
create variable transformations
REAL epsilon
Definition: triangle.c:617