Logo ROOT   6.16/01
Reference Guide
DeepNet.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDeepNet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network *
12 * *
13 * Authors (alphabetical): *
14 * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16 * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17 * *
18 * Copyright (c) 2005-2015: *
19 * CERN, Switzerland *
20 * U. of Victoria, Canada *
21 * MPI-K Heidelberg, Germany *
22 * U. of Bonn, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
31
32#include "TString.h"
33
34#include "TMVA/DNN/Functions.h"
36
38#include "TMVA/DNN/DenseLayer.h"
40
43
45
46#ifdef HAVE_DAE
47#include "TMVA/DNN/DAE/CompressionLayer.h"
48#include "TMVA/DNN/DAE/CorruptionLayer.h"
49#include "TMVA/DNN/DAE/ReconstructionLayer.h"
50#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
51#endif
52
53#include <vector>
54#include <cmath>
55
56
57namespace TMVA {
58namespace DNN {
59
60 using namespace CNN;
61 using namespace RNN;
62 //using namespace DAE;
63
64/** \class TDeepNet
65
66 Generic Deep Neural Network class.
67
68 This classs encapsulates the information for all types of Deep Neural Networks.
69
70 \tparam Architecture The Architecture type that holds the
71 architecture-specific data types.
72 */
73template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
74class TDeepNet {
75public:
76 using Matrix_t = typename Architecture_t::Matrix_t;
77 using Scalar_t = typename Architecture_t::Scalar_t;
78
79private:
80 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
81 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
82
83private:
84 std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
85
86 size_t fBatchSize; ///< Batch size used for training and evaluation.
87 size_t fInputDepth; ///< The depth of the input.
88 size_t fInputHeight; ///< The height of the input.
89 size_t fInputWidth; ///< The width of the input.
90
91 size_t fBatchDepth; ///< The depth of the batch used for training/testing.
92 size_t fBatchHeight; ///< The height of the batch used for training/testing.
93 size_t fBatchWidth; ///< The width of the batch used for training/testing.
94
95 bool fIsTraining; ///< Is the network training?
96
97 ELossFunction fJ; ///< The loss function of the network.
98 EInitialization fI; ///< The initialization method of the network.
99 ERegularization fR; ///< The regularization used for the network.
100 Scalar_t fWeightDecay; ///< The weight decay factor.
101
102public:
103 /*! Default Constructor */
105
106 /*! Constructor */
107 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
108 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
109 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
110
111 /*! Copy-constructor */
113
114 /*! Destructor */
116
117 /*! Function for adding Convolution layer in the Deep Neural Network,
118 * with a given depth, filter height and width, striding in rows and columns,
119 * the zero paddings, as well as the activation function and the dropout
120 * probability. Based on these parameters, it calculates the width and height
121 * of the convolutional layer. */
122 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
123 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
124 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
125
126 /*! Function for adding Convolution Layer in the Deep Neural Network,
127 * when the layer is already created. */
129
130 /*! Function for adding Pooling layer in the Deep Neural Network,
131 * with a given filter height and width, striding in rows and columns as
132 * well as the dropout probability. The depth is same as the previous
133 * layer depth. Based on these parameters, it calculates the width and
134 * height of the pooling layer. */
135 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
136 size_t strideCols, Scalar_t dropoutProbability = 1.0);
137 /*! Function for adding Max Pooling layer in the Deep Neural Network,
138 * when the layer is already created. */
140
141
142 /*! Function for adding Recurrent Layer in the Deep Neural Network,
143 * with given parameters */
144 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
145 bool rememberState = false);
146
147 /*! Function for adding Vanilla RNN when the layer is already created
148 */
150
151 /*! Function for adding Dense Connected Layer in the Deep Neural Network,
152 * with a given width, activation function and dropout probability.
153 * Based on the previous layer dimensions, it calculates the input width
154 * of the fully connected layer. */
156
157 /*! Function for adding Dense Layer in the Deep Neural Network, when
158 * the layer is already created. */
160
161 /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
162 * height and width. It will take every matrix from the previous layer and
163 * reshape it to a matrix with new dimensions. */
164 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
165
166 /*! Function for adding Reshape Layer in the Deep Neural Network, when
167 * the layer is already created. */
169
170#ifdef HAVE_DAE /// DAE functions
171 /*! Function for adding Corruption layer in the Deep Neural Network,
172 * with given number of visibleUnits and hiddenUnits. It corrupts input
173 * according to given corruptionLevel and dropoutProbability. */
174 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
175 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
176
177 /*! Function for adding Corruption Layer in the Deep Neural Network,
178 * when the layer is already created. */
179 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
180
181 /*! Function for adding Compression layer in the Deep Neural Network,
182 * with given number of visibleUnits and hiddenUnits. It compresses the input units
183 * taking weights and biases from prev layers. */
184 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
185 Scalar_t dropoutProbability, EActivationFunction f,
186 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
187
188 /*! Function for adding Compression Layer in the Deep Neural Network, when
189 * the layer is already created. */
190 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
191
192 /*! Function for adding Reconstruction layer in the Deep Neural Network,
193 * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
194 * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
195 * must be passed as in corruptionLayer. */
196 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
197 Scalar_t learningRate, EActivationFunction f,
198 std::vector<Matrix_t> weights,
199 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
200 Scalar_t dropoutProbability);
201
202 /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
203 * the layer is already created. */
204 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
205
206 /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
207 * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
208 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
209 size_t testDataBatchSize,
210 Scalar_t learningRate);
211
212 /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
213 * the layer is already created. */
214 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
215
216 /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
217 * layers. */
218 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
219 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
220 bool applyDropout = false);
221
222 /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
223 * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
224 */
225 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
227#endif
228
229 /*! Function for initialization of the Neural Net. */
231
232 /*! Function that executes the entire forward pass in the network. */
233 void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
234
235 /*! Function for parallel forward in the vector of deep nets, where the master
236 * net is the net calling this function. There is one batch for one deep net.*/
238 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
239
240 /*! Function that executes the entire backward pass in the network. */
241 void Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights);
242
243
244 /*! Function for parallel backward in the vector of deep nets, where the master
245 * net is the net calling this function and getting the updates from the other nets.
246 * There is one batch for one deep net.*/
248 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
249
250 /*! Function for parallel backward in the vector of deep nets, where the master
251 * net is the net calling this function and getting the updates from the other nets,
252 * following the momentum strategy. There is one batch for one deep net.*/
254 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
255 Scalar_t momentum);
256
257 /*! Function for parallel backward in the vector of deep nets, where the master
258 * net is the net calling this function and getting the updates from the other nets,
259 * following the Nestorov momentum strategy. There is one batch for one deep net.*/
261 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
262 Scalar_t momentum);
263
264 /*! Function that will update the weights and biases in the layers that
265 * contain weights and biases. */
266 void Update(Scalar_t learningRate);
267
268 /*! Function for evaluating the loss, based on the activations stored
269 * in the last layer. */
270 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
271
272 /*! Function for evaluating the loss, based on the propagation of the given input. */
273 Scalar_t Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights,
274 bool applyDropout = false, bool includeRegularization = true);
275
276 /*! Function for computing the regularizaton term to be added to the loss function */
278
279 /*! Prediction based on activations stored in the last layer. */
280 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
281
282 /*! Prediction for the given inputs, based on what network learned. */
283 void Prediction(Matrix_t &predictions, std::vector<Matrix_t> input, EOutputFunction f);
284
285 /*! Print the Deep Net Info */
286 void Print() const;
287
288 /*! Get the layer in the vector of layers at poistion i */
289 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
290 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
291
292 /* Depth and the output width of the network. */
293 inline size_t GetDepth() const { return fLayers.size(); }
294 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
295
296 /* Return a reference to the layers. */
297 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
298 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
299
300 /*! Remove all layers from the network. */
301 inline void Clear() { fLayers.clear(); }
302
303 /*! Getters */
304 inline size_t GetBatchSize() const { return fBatchSize; }
305 inline size_t GetInputDepth() const { return fInputDepth; }
306 inline size_t GetInputHeight() const { return fInputHeight; }
307 inline size_t GetInputWidth() const { return fInputWidth; }
308
309 inline size_t GetBatchDepth() const { return fBatchDepth; }
310 inline size_t GetBatchHeight() const { return fBatchHeight; }
311 inline size_t GetBatchWidth() const { return fBatchWidth; }
312
313 inline bool IsTraining() const { return fIsTraining; }
314
315 inline ELossFunction GetLossFunction() const { return fJ; }
316 inline EInitialization GetInitialization() const { return fI; }
317 inline ERegularization GetRegularization() const { return fR; }
318 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
319
320 /*! Setters */
321 // FIXME many of these won't work as the data structure storing activations
322 // and gradients have not changed in all the layers, also params in layers
323 // have not changed either
324 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
325 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
326 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
327 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
328 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
329 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
330 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
331 inline void SetLossFunction(ELossFunction J) { fJ = J; }
335};
336
337//
338// Deep Net Class - Implementation
339//
340//______________________________________________________________________________
341template <typename Architecture_t, typename Layer_t>
343 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
344 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
345 fIsTraining(true), fWeightDecay(0.0)
346{
347 // Nothing to do here.
348}
349
350//______________________________________________________________________________
351template <typename Architecture_t, typename Layer_t>
352TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
353 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
355 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
356 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
357 fR(R), fWeightDecay(weightDecay)
358{
359 // Nothing to do here.
360}
361
362//______________________________________________________________________________
363template <typename Architecture_t, typename Layer_t>
365 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
366 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
367 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
368 fWeightDecay(deepNet.fWeightDecay)
369{
370 // Nothing to do here.
371}
372
373//______________________________________________________________________________
374template <typename Architecture_t, typename Layer_t>
376{
377 // Relese the layers memory
378}
379
380//______________________________________________________________________________
381template <typename Architecture_t, typename Layer_t>
382auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
383{
384 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
385 if (!isInteger(dimension) || dimension <= 0) {
386 this->Print();
387 int iLayer = fLayers.size();
388 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
389 iLayer, imgDim, fltDim, padding, stride);
390 // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
391 // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
392 // std::exit(EXIT_FAILURE);
393 }
394
395 return (size_t)dimension;
396}
397
398//______________________________________________________________________________
399template <typename Architecture_t, typename Layer_t>
401 size_t filterWidth, size_t strideRows,
402 size_t strideCols, size_t paddingHeight,
403 size_t paddingWidth, EActivationFunction f,
404 Scalar_t dropoutProbability)
405{
406 // All variables defining a convolutional layer
407 size_t batchSize = this->GetBatchSize();
408 size_t inputDepth;
409 size_t inputHeight;
410 size_t inputWidth;
411 EInitialization init = this->GetInitialization();
412 ERegularization reg = this->GetRegularization();
413 Scalar_t decay = this->GetWeightDecay();
414
415 if (fLayers.size() == 0) {
416 inputDepth = this->GetInputDepth();
417 inputHeight = this->GetInputHeight();
418 inputWidth = this->GetInputWidth();
419 } else {
420 Layer_t *lastLayer = fLayers.back();
421 inputDepth = lastLayer->GetDepth();
422 inputHeight = lastLayer->GetHeight();
423 inputWidth = lastLayer->GetWidth();
424 }
425
426
427
428 // Create the conv layer
430 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
431 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
432
433 fLayers.push_back(convLayer);
434 return convLayer;
435}
436
437//______________________________________________________________________________
438template <typename Architecture_t, typename Layer_t>
440{
441 fLayers.push_back(convLayer);
442}
443
444//______________________________________________________________________________
445template <typename Architecture_t, typename Layer_t>
447 size_t strideRows, size_t strideCols,
448 Scalar_t dropoutProbability)
449{
450 size_t batchSize = this->GetBatchSize();
451 size_t inputDepth;
452 size_t inputHeight;
453 size_t inputWidth;
454
455 if (fLayers.size() == 0) {
456 inputDepth = this->GetInputDepth();
457 inputHeight = this->GetInputHeight();
458 inputWidth = this->GetInputWidth();
459 } else {
460 Layer_t *lastLayer = fLayers.back();
461 inputDepth = lastLayer->GetDepth();
462 inputHeight = lastLayer->GetHeight();
463 inputWidth = lastLayer->GetWidth();
464 }
465
467 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
468 strideRows, strideCols, dropoutProbability);
469
470 // But this creates a copy or what?
471 fLayers.push_back(maxPoolLayer);
472
473 return maxPoolLayer;
474}
475
476//______________________________________________________________________________
477template <typename Architecture_t, typename Layer_t>
479{
480 fLayers.push_back(maxPoolLayer);
481}
482
483//______________________________________________________________________________
484template <typename Architecture_t, typename Layer_t>
486 size_t timeSteps,
487 bool rememberState)
488{
489
490 // should check if input and time size are consistent
491
492 //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
493 size_t inputHeight, inputWidth;
494 if (fLayers.size() == 0) {
495 inputHeight = this->GetInputHeight();
496 inputWidth = this->GetInputWidth();
497 } else {
498 Layer_t *lastLayer = fLayers.back();
499 inputHeight = lastLayer->GetHeight();
500 inputWidth = lastLayer->GetWidth();
501 }
502 if (inputSize != inputWidth) {
503 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
504 }
505 if (timeSteps != inputHeight) {
506 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
507 }
508
509 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
510 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
511 DNN::EActivationFunction::kTanh, fIsTraining, this->GetInitialization());
512 fLayers.push_back(basicRNNLayer);
513 return basicRNNLayer;
514}
515
516//______________________________________________________________________________
517template <typename Architecture_t, typename Layer_t>
519{
520 fLayers.push_back(basicRNNLayer);
521}
522
523//DAE
524#ifdef HAVE_DAE
525
526//______________________________________________________________________________
527template <typename Architecture_t, typename Layer_t>
528TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
529 size_t hiddenUnits,
530 Scalar_t dropoutProbability,
531 Scalar_t corruptionLevel)
532{
533 size_t batchSize = this->GetBatchSize();
534
535 TCorruptionLayer<Architecture_t> *corruptionLayer =
536 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
537 fLayers.push_back(corruptionLayer);
538 return corruptionLayer;
539}
540//______________________________________________________________________________
541
542template <typename Architecture_t, typename Layer_t>
543void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
544{
545 fLayers.push_back(corruptionLayer);
546}
547
548//______________________________________________________________________________
549template <typename Architecture_t, typename Layer_t>
550TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
551 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
552 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
553{
554 size_t batchSize = this->GetBatchSize();
555
556 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
557 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
558 fLayers.push_back(compressionLayer);
559 return compressionLayer;
560}
561//______________________________________________________________________________
562
563template <typename Architecture_t, typename Layer_t>
564void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
565{
566 fLayers.push_back(compressionLayer);
567}
568
569//______________________________________________________________________________
570template <typename Architecture_t, typename Layer_t>
571TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
572 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
573 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
574{
575 size_t batchSize = this->GetBatchSize();
576
577 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
578 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
579 fLayers.push_back(reconstructionLayer);
580 return reconstructionLayer;
581}
582//______________________________________________________________________________
583
584template <typename Architecture_t, typename Layer_t>
585void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
586 TReconstructionLayer<Architecture_t> *reconstructionLayer)
587{
588 fLayers.push_back(reconstructionLayer);
589}
590
591//______________________________________________________________________________
592template <typename Architecture_t, typename Layer_t>
593TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
594 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
595{
596 size_t batchSize = this->GetBatchSize();
597
598 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
599 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
600 fLayers.push_back(logisticRegressionLayer);
601 return logisticRegressionLayer;
602}
603//______________________________________________________________________________
604template <typename Architecture_t, typename Layer_t>
605void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
606 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
607{
608 fLayers.push_back(logisticRegressionLayer);
609}
610#endif
611
612
613//______________________________________________________________________________
614template <typename Architecture_t, typename Layer_t>
616 Scalar_t dropoutProbability)
617{
618 size_t batchSize = this->GetBatchSize();
619 size_t inputWidth;
620 EInitialization init = this->GetInitialization();
621 ERegularization reg = this->GetRegularization();
622 Scalar_t decay = this->GetWeightDecay();
623
624 if (fLayers.size() == 0) {
625 inputWidth = this->GetInputWidth();
626 } else {
627 Layer_t *lastLayer = fLayers.back();
628 inputWidth = lastLayer->GetWidth();
629 }
630
631 TDenseLayer<Architecture_t> *denseLayer =
632 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
633
634 fLayers.push_back(denseLayer);
635
636 return denseLayer;
637}
638
639//______________________________________________________________________________
640template <typename Architecture_t, typename Layer_t>
642{
643 fLayers.push_back(denseLayer);
644}
645
646//______________________________________________________________________________
647template <typename Architecture_t, typename Layer_t>
649 size_t width, bool flattening)
650{
651 size_t batchSize = this->GetBatchSize();
652 size_t inputDepth;
653 size_t inputHeight;
654 size_t inputWidth;
655 size_t outputNSlices;
656 size_t outputNRows;
657 size_t outputNCols;
658
659 if (fLayers.size() == 0) {
660 inputDepth = this->GetInputDepth();
661 inputHeight = this->GetInputHeight();
662 inputWidth = this->GetInputWidth();
663 } else {
664 Layer_t *lastLayer = fLayers.back();
665 inputDepth = lastLayer->GetDepth();
666 inputHeight = lastLayer->GetHeight();
667 inputWidth = lastLayer->GetWidth();
668 }
669
670 if (flattening) {
671 outputNSlices = 1;
672 outputNRows = this->GetBatchSize();
673 outputNCols = depth * height * width;
674 size_t inputNCols = inputDepth * inputHeight * inputWidth;
675 if (outputNCols != 0 && outputNCols != inputNCols ) {
676 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
677 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
678 }
679 outputNCols = inputNCols;
680 depth = 1;
681 height = 1;
682 width = outputNCols;
683 } else {
684 outputNSlices = this->GetBatchSize();
685 outputNRows = depth;
686 outputNCols = height * width;
687 }
688
689 TReshapeLayer<Architecture_t> *reshapeLayer =
690 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
691 outputNSlices, outputNRows, outputNCols, flattening);
692
693 fLayers.push_back(reshapeLayer);
694
695 return reshapeLayer;
696}
697
698//______________________________________________________________________________
699template <typename Architecture_t, typename Layer_t>
701{
702 fLayers.push_back(reshapeLayer);
703}
704
705//______________________________________________________________________________
706template <typename Architecture_t, typename Layer_t>
708{
709 for (size_t i = 0; i < fLayers.size(); i++) {
710 fLayers[i]->Initialize();
711 }
712}
713
714template <typename Architecture>
715auto debugTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "tensor") -> void
716{
717 std::cout << name << "\n";
718 for (size_t l = 0; l < A.size(); ++l) {
719 for (size_t i = 0; i < A[l].GetNrows(); ++i) {
720 for (size_t j = 0; j < A[l].GetNcols(); ++j) {
721 std::cout << A[l](i, j) << " ";
722 }
723 std::cout << "\n";
724 }
725 std::cout << "********\n";
726 }
727}
728
729//______________________________________________________________________________
730template <typename Architecture_t, typename Layer_t>
731auto TDeepNet<Architecture_t, Layer_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
732{
733 fLayers.front()->Forward(input, applyDropout);
734
735 for (size_t i = 1; i < fLayers.size(); i++) {
736 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
737 }
738}
739
740//______________________________________________________________________________
741template <typename Architecture_t, typename Layer_t>
743 std::vector<TTensorBatch<Architecture_t>> &batches,
744 bool applyDropout) -> void
745{
746 size_t depth = this->GetDepth();
747
748 // The first layer of each deep net
749 for (size_t i = 0; i < nets.size(); i++) {
750 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
751 }
752
753 // The i'th layer of each deep net
754 for (size_t i = 1; i < depth; i++) {
755 for (size_t j = 0; j < nets.size(); j++) {
756 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
757 }
758 }
759}
760
761#ifdef HAVE_DAE
762//_____________________________________________________________________________
763template <typename Architecture_t, typename Layer_t>
764auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
765 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
766 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
767 EActivationFunction f, bool applyDropout) -> void
768{
769 std::vector<Matrix_t> inp1;
770 std::vector<Matrix_t> inp2;
771 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
772 // size_t batchSize = this->GetBatchSize();
773 size_t visibleUnits = (size_t)input[0].GetNrows();
774
775 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
776 fLayers.back()->Initialize();
777 fLayers.back()->Forward(input, applyDropout);
778 // fLayers.back()->Print();
779
780 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
781 fLayers.back()->GetBiases());
782 fLayers.back()->Initialize();
783 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
784
785 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
786 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
787 fLayers.back()->Initialize();
788 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
789 applyDropout); // as we have to pass compressed Input
790 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
791 input);
792 // three layers are added, now pointer is on third layer
793 size_t weightsSize = fLayers.back()->GetWeights().size();
794 size_t biasesSize = fLayers.back()->GetBiases().size();
795 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
796 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
797 for (size_t j = 0; j < weightsSize; j++) {
798 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
799 }
800 for (size_t j = 0; j < biasesSize; j++) {
801 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
802 }
803 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
804 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
805 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
806 fLayers[fLayers.size() - 3]->GetOutput(), input);
807 }
808 fLayers.back()->Print();
809
810 for (size_t i = 1; i < numOfHiddenLayers; i++) {
811
812 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
813 fLayers.back()->Initialize();
814 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
815 applyDropout); // as we have to pass compressed Input
816
817 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
818 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
819 fLayers.back()->Initialize();
820 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
821
822 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
823 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
824 dropoutProbability);
825 fLayers.back()->Initialize();
826 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
827 applyDropout); // as we have to pass compressed Input
828 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
829 fLayers[fLayers.size() - 5]->GetOutput());
830
831 // three layers are added, now pointer is on third layer
832 size_t _weightsSize = fLayers.back()->GetWeights().size();
833 size_t _biasesSize = fLayers.back()->GetBiases().size();
834 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
835 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
836 for (size_t j = 0; j < _weightsSize; j++) {
837 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
838 }
839 for (size_t j = 0; j < _biasesSize; j++) {
840 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
841 }
842 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
843 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
844 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
845 fLayers[fLayers.size() - 3]->GetOutput(),
846 fLayers[fLayers.size() - 5]->GetOutput());
847 }
848 fLayers.back()->Print();
849 }
850}
851
852//______________________________________________________________________________
853template <typename Architecture_t, typename Layer_t>
854auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
855 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
856 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
857{
858 std::vector<Matrix_t> inp1;
859 std::vector<Matrix_t> inp2;
860 if (fLayers.size() == 0) // only Logistic Regression Layer
861 {
862 size_t inputUnits = input[0].GetNrows();
863
864 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
865 fLayers.back()->Initialize();
866 for (size_t i = 0; i < epochs; i++) {
867 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
868 }
869 fLayers.back()->Forward(input, false);
870 fLayers.back()->Print();
871 } else { // if used after any other layer
872 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
873 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
874 fLayers.back()->Initialize();
875 for (size_t i = 0; i < epochs; i++) {
876 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
877 }
878 fLayers.back()->Forward(testInput, false);
879 fLayers.back()->Print();
880 }
881}
882#endif
883
884//______________________________________________________________________________
885template <typename Architecture_t, typename Layer_t>
886auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
887 const Matrix_t &weights) -> void
888{
889 std::vector<Matrix_t> inp1;
890 std::vector<Matrix_t> inp2;
891 // Last layer should be dense layer
892 evaluateGradients<Architecture_t>(fLayers.back()->GetActivationGradientsAt(0), this->GetLossFunction(), groundTruth,
893 fLayers.back()->GetOutputAt(0), weights);
894 for (size_t i = fLayers.size() - 1; i > 0; i--) {
895 std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
896 std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
897 fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
898 }
899
900 // need to have a dummy tensor (size=0) to pass for activation gradient backward which
901 // are not computed for the first layer
902 std::vector<Matrix_t> dummy;
903 fLayers[0]->Backward(dummy, input, inp1, inp2);
904}
905
906//______________________________________________________________________________
907template <typename Architecture_t, typename Layer_t>
909 std::vector<TTensorBatch<Architecture_t>> &batches,
910 Scalar_t learningRate) -> void
911{
912 std::vector<Matrix_t> inp1;
913 std::vector<Matrix_t> inp2;
914 size_t depth = this->GetDepth();
915
916 // Evaluate the gradients of the last layers in each deep net
917 for (size_t i = 0; i < nets.size(); i++) {
918 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
919 nets[i].GetLossFunction(), batches[i].GetOutput(),
920 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
921 }
922
923 // Backpropagate the error in i'th layer of each deep net
924 for (size_t i = depth - 1; i > 0; i--) {
925 for (size_t j = 0; j < nets.size(); j++) {
926 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
927 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
928 }
929 }
930
931 std::vector<Matrix_t> dummy;
932
933 // First layer of each deep net
934 for (size_t i = 0; i < nets.size(); i++) {
935 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
936 }
937
938 // Update and copy
939 for (size_t i = 0; i < nets.size(); i++) {
940 for (size_t j = 0; j < depth; j++) {
941 Layer_t *masterLayer = this->GetLayerAt(j);
942 Layer_t *layer = nets[i].GetLayerAt(j);
943
944 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
945 layer->CopyWeights(masterLayer->GetWeights());
946
947 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
948 layer->CopyBiases(masterLayer->GetBiases());
949 }
950 }
951}
952
953//______________________________________________________________________________
954template <typename Architecture_t, typename Layer_t>
956 std::vector<TTensorBatch<Architecture_t>> &batches,
957 Scalar_t learningRate, Scalar_t momentum) -> void
958{
959 std::vector<Matrix_t> inp1;
960 std::vector<Matrix_t> inp2;
961 size_t depth = this->GetDepth();
962
963 // Evaluate the gradients of the last layers in each deep net
964 for (size_t i = 0; i < nets.size(); i++) {
965 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
966 nets[i].GetLossFunction(), batches[i].GetOutput(),
967 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
968 }
969
970 // Backpropagate the error in i'th layer of each deep net
971 for (size_t i = depth - 1; i > 0; i--) {
972 Layer_t *masterLayer = this->GetLayerAt(i);
973
974 for (size_t j = 0; j < nets.size(); j++) {
975 Layer_t *layer = nets[j].GetLayerAt(i);
976
977 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
978 inp1, inp2);
979 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
980 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
981 }
982
983 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
984 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
985 }
986
987 std::vector<Matrix_t> dummy;
988
989 // First layer of each deep net
990 Layer_t *masterFirstLayer = this->GetLayerAt(0);
991 for (size_t i = 0; i < nets.size(); i++) {
992 Layer_t *layer = nets[i].GetLayerAt(0);
993
994 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
995
996 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
997 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
998 }
999
1000 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1001 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1002
1003 for (size_t i = 0; i < depth; i++) {
1004 Layer_t *masterLayer = this->GetLayerAt(i);
1005 masterLayer->Update(1.0);
1006
1007 for (size_t j = 0; j < nets.size(); j++) {
1008 Layer_t *layer = nets[j].GetLayerAt(i);
1009
1010 layer->CopyWeights(masterLayer->GetWeights());
1011 layer->CopyBiases(masterLayer->GetBiases());
1012 }
1013 }
1014}
1015
1016//______________________________________________________________________________
1017template <typename Architecture_t, typename Layer_t>
1019 std::vector<TTensorBatch<Architecture_t>> &batches,
1020 Scalar_t learningRate, Scalar_t momentum) -> void
1021{
1022 std::cout << "Parallel Backward Nestorov" << std::endl;
1023 std::vector<Matrix_t> inp1;
1024 std::vector<Matrix_t> inp2;
1025 size_t depth = this->GetDepth();
1026
1027 // Evaluate the gradients of the last layers in each deep net
1028 for (size_t i = 0; i < nets.size(); i++) {
1029 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1030 nets[i].GetLossFunction(), batches[i].GetOutput(),
1031 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1032 }
1033
1034 // Backpropagate the error in i'th layer of each deep net
1035 for (size_t i = depth - 1; i > 0; i--) {
1036 for (size_t j = 0; j < nets.size(); j++) {
1037 Layer_t *layer = nets[j].GetLayerAt(i);
1038
1039 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1040 inp1, inp2);
1041 }
1042 }
1043
1044 std::vector<Matrix_t> dummy;
1045
1046 // First layer of each deep net
1047 for (size_t i = 0; i < nets.size(); i++) {
1048 Layer_t *layer = nets[i].GetLayerAt(0);
1049 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1050 }
1051
1052 for (size_t i = 0; i < depth; i++) {
1053 Layer_t *masterLayer = this->GetLayerAt(i);
1054 for (size_t j = 0; j < nets.size(); j++) {
1055 Layer_t *layer = nets[j].GetLayerAt(i);
1056
1057 layer->CopyWeights(masterLayer->GetWeights());
1058 layer->CopyBiases(masterLayer->GetBiases());
1059
1060 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1061 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1062 }
1063
1064 for (size_t j = 0; j < nets.size(); j++) {
1065 Layer_t *layer = nets[j].GetLayerAt(i);
1066
1067 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1068 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1069 }
1070
1071 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1072 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1073
1074 masterLayer->Update(1.0);
1075 }
1076}
1077
1078//______________________________________________________________________________
1079template <typename Architecture_t, typename Layer_t>
1081{
1082 for (size_t i = 0; i < fLayers.size(); i++) {
1083 fLayers[i]->Update(learningRate);
1084 }
1085}
1086
1087//______________________________________________________________________________
1088template <typename Architecture_t, typename Layer_t>
1089auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1090 bool includeRegularization) const -> Scalar_t
1091{
1092 // Last layer should not be deep
1093 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1094
1095 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1096 if (includeRegularization) {
1097 loss += RegularizationTerm();
1098 }
1099
1100 return loss;
1101}
1102
1103//______________________________________________________________________________
1104template <typename Architecture_t, typename Layer_t>
1105auto TDeepNet<Architecture_t, Layer_t>::Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
1106 const Matrix_t &weights, bool applyDropout, bool includeRegularization)
1107 -> Scalar_t
1108{
1109 Forward(input, applyDropout);
1110 return Loss(groundTruth, weights, includeRegularization);
1111}
1112
1113//______________________________________________________________________________
1114template <typename Architecture_t, typename Layer_t>
1116{
1117 Scalar_t reg = 0.0;
1118 for (size_t i = 0; i < fLayers.size(); i++) {
1119 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1120 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1121 }
1122 }
1123 return this->GetWeightDecay() * reg;
1124}
1125
1126
1127//______________________________________________________________________________
1128template <typename Architecture_t, typename Layer_t>
1130{
1131 // Last layer should not be deep
1132 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1133}
1134
1135//______________________________________________________________________________
1136template <typename Architecture_t, typename Layer_t>
1137auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, std::vector<Matrix_t> input,
1138 EOutputFunction f) -> void
1139{
1140 Forward(input, false);
1141 // Last layer should not be deep
1142 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1143}
1144
1145//______________________________________________________________________________
1146template <typename Architecture_t, typename Layer_t>
1148{
1149 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1150 std::cout << " Input = ( " << this->GetInputDepth();
1151 std::cout << ", " << this->GetInputHeight();
1152 std::cout << ", " << this->GetInputWidth() << " )";
1153 std::cout << " Batch size = " << this->GetBatchSize();
1154 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1155
1156 //std::cout << "\t Layers: " << std::endl;
1157
1158 for (size_t i = 0; i < fLayers.size(); i++) {
1159 std::cout << "\tLayer " << i << "\t";
1160 fLayers[i]->Print();
1161 }
1162}
1163} // namespace DNN
1164} // namespace TMVA
1165
1166#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static Int_t init()
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
double floor(double)
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:57
Generic Deep Neural Network class.
Definition: DeepNet.h:74
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:298
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:641
size_t GetBatchHeight() const
Definition: DeepNet.h:310
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:328
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:908
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:331
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:92
ERegularization GetRegularization() const
Definition: DeepNet.h:317
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:297
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:77
void Initialize()
DAE functions.
Definition: DeepNet.h:707
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:304
size_t GetDepth() const
Definition: DeepNet.h:293
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:318
size_t GetInputDepth() const
Definition: DeepNet.h:305
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:84
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:91
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:87
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
Definition: DeepNet.h:289
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1147
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:334
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:700
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:301
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
Definition: DeepNet.h:1115
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:615
void Prediction(Matrix_t &predictions, std::vector< Matrix_t > input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
Definition: DeepNet.h:1137
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
Definition: DeepNet.h:352
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
Definition: DeepNet.h:742
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:325
bool IsTraining() const
Definition: DeepNet.h:313
size_t GetInputHeight() const
Definition: DeepNet.h:306
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:86
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1129
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:955
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:89
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:326
size_t GetBatchWidth() const
Definition: DeepNet.h:311
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
Definition: DeepNet.h:518
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:478
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:446
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:100
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1089
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:400
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:99
size_t GetInputWidth() const
Definition: DeepNet.h:307
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:485
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:886
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:80
size_t GetOutputWidth() const
Definition: DeepNet.h:294
bool fIsTraining
Is the network training?
Definition: DeepNet.h:95
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:648
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:324
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:439
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:88
void SetRegularization(ERegularization R)
Definition: DeepNet.h:333
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:731
TDeepNet(const TDeepNet &)
Copy-constructor.
Definition: DeepNet.h:364
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:93
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:97
~TDeepNet()
Destructor.
Definition: DeepNet.h:375
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:330
Scalar_t Loss(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool applyDropout=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
Definition: DeepNet.h:1105
TDeepNet()
Default Constructor.
Definition: DeepNet.h:342
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:329
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
Definition: DeepNet.h:1080
ELossFunction GetLossFunction() const
Definition: DeepNet.h:315
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:382
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:290
void SetInitialization(EInitialization I)
Definition: DeepNet.h:332
EInitialization GetInitialization() const
Definition: DeepNet.h:316
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:1018
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:327
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:76
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:98
size_t GetBatchDepth() const
Definition: DeepNet.h:309
Generic layer class.
Definition: DenseLayer.h:55
Double_t x[n]
Definition: legend1.C:17
#define I(x, y, z)
static double A[]
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EInitialization
Definition: Functions.h:70
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
Definition: DeepNet.h:715
Abstract ClassifierFactory template that handles arbitrary types.
auto * l
Definition: textangle.C:4