29#ifndef TMVA_DNN_DEEPNET 
   30#define TMVA_DNN_DEEPNET 
   48#include "TMVA/DNN/DAE/CompressionLayer.h" 
   49#include "TMVA/DNN/DAE/CorruptionLayer.h" 
   50#include "TMVA/DNN/DAE/ReconstructionLayer.h" 
   51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h" 
   72template <
typename Architecture_t, 
typename Layer_t = VGeneralLayer<Architecture_t>>
 
   76   using Tensor_t = 
typename Architecture_t::Tensor_t;
 
   77   using Matrix_t = 
typename Architecture_t::Matrix_t;
 
   78   using Scalar_t = 
typename Architecture_t::Scalar_t;
 
  109   TDeepNet(
size_t BatchSize, 
size_t InputDepth, 
size_t InputHeight, 
size_t InputWidth, 
size_t BatchDepth,
 
  125                                            size_t strideCols, 
size_t paddingHeight, 
size_t paddingWidth,
 
  138                                                  size_t strideCols, 
Scalar_t dropoutProbability = 1.0);
 
  147                                                    bool rememberState = 
false,
bool returnSequence = 
false,
 
  157                                                    bool rememberState = 
false, 
bool returnSequence = 
false);
 
  166                                                    bool rememberState = 
false, 
bool returnSequence = 
false,
 
  167                                                    bool resetGateAfter = 
false);
 
  199   TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits, 
size_t hiddenUnits,
 
  204   void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
 
  209   TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits, 
size_t hiddenUnits,
 
  211                                                          std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
 
  215   void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
 
  221   TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits, 
size_t hiddenUnits,
 
  223                                                                std::vector<Matrix_t> weights,
 
  224                                                                std::vector<Matrix_t> biases, 
Scalar_t corruptionLevel,
 
  229   void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
 
  233   TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits, 
size_t outputUnits,
 
  234                                                                        size_t testDataBatchSize,
 
  239   void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
 
  243   void PreTrain(std::vector<Matrix_t> &
input, std::vector<size_t> numHiddenUnitsPerLayer, 
Scalar_t learningRate,
 
  245                 bool applyDropout = 
false);
 
  250   void FineTune(std::vector<Matrix_t> &
input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
 
  251                 size_t outputUnits, 
size_t testDataBatchSize, 
Scalar_t learningRate, 
size_t epochs);
 
  269#ifdef USE_PARALLEL_DEEPNET 
  307                 bool inTraining = 
false, 
bool includeRegularization = 
true);
 
  377template <
typename Architecture_t, 
typename Layer_t>
 
  379   : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
 
  381     fIsTraining(true), fWeightDecay(0.0)
 
  387template <
typename Architecture_t, 
typename Layer_t>
 
  389                                            size_t batchDepth, 
size_t batchHeight, 
size_t batchWidth, 
ELossFunction J,
 
  391   : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
 
  392     fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
 
  399template <
typename Architecture_t, 
typename Layer_t>
 
  401   : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
 
  402     fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
 
  403     fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
 
  404     fWeightDecay(deepNet.fWeightDecay)
 
  410template <
typename Architecture_t, 
typename Layer_t>
 
  414   for (
auto  layer : fLayers)
 
  420template <
typename Architecture_t, 
typename Layer_t>
 
  423   Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
 
  424   if (!isInteger(dimension) || dimension <= 0) {
 
  426      int iLayer = fLayers.size();
 
  427      Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
 
  428            iLayer, imgDim, fltDim, padding, stride);
 
  434   return (
size_t)dimension;
 
  438template <
typename Architecture_t, 
typename Layer_t>
 
  440                                                                            size_t filterWidth, 
size_t strideRows,
 
  441                                                                            size_t strideCols, 
size_t paddingHeight,
 
  446   size_t batchSize = this->GetBatchSize();
 
  452   Scalar_t decay = this->GetWeightDecay();
 
  454   if (fLayers.size() == 0) {
 
  455      inputDepth = this->GetInputDepth();
 
  456      inputHeight = this->GetInputHeight();
 
  457      inputWidth = this->GetInputWidth();
 
  459      Layer_t *lastLayer = fLayers.back();
 
  460      inputDepth = lastLayer->GetDepth();
 
  461      inputHeight = lastLayer->GetHeight();
 
  462      inputWidth = lastLayer->GetWidth();
 
  469           batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
 
  470           strideCols, paddingHeight, paddingWidth, dropoutProbability, 
f, 
reg, decay);
 
  472   fLayers.push_back(convLayer);
 
  477template <
typename Architecture_t, 
typename Layer_t>
 
  480   fLayers.push_back(convLayer);
 
  484template <
typename Architecture_t, 
typename Layer_t>
 
  486                                                                                  size_t strideRows, 
size_t strideCols,
 
  489   size_t batchSize = this->GetBatchSize();
 
  494   if (fLayers.size() == 0) {
 
  495      inputDepth = this->GetInputDepth();
 
  496      inputHeight = this->GetInputHeight();
 
  497      inputWidth = this->GetInputWidth();
 
  499      Layer_t *lastLayer = fLayers.back();
 
  500      inputDepth = lastLayer->GetDepth();
 
  501      inputHeight = lastLayer->GetHeight();
 
  502      inputWidth = lastLayer->GetWidth();
 
  506      batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
 
  507      strideRows, strideCols, dropoutProbability);
 
  510   fLayers.push_back(maxPoolLayer);
 
  516template <
typename Architecture_t, 
typename Layer_t>
 
  519   fLayers.push_back(maxPoolLayer);
 
  523template <
typename Architecture_t, 
typename Layer_t>
 
  526                                                                                    bool rememberState, 
bool returnSequence,
 
  533   size_t inputHeight, inputWidth, inputDepth;
 
  534   if (fLayers.size() == 0) {
 
  535      inputHeight = this->GetInputHeight();
 
  536      inputWidth = this->GetInputWidth();
 
  537      inputDepth = this->GetInputDepth();
 
  539      Layer_t *lastLayer = fLayers.back();
 
  540      inputHeight = lastLayer->GetHeight();
 
  541      inputWidth = lastLayer->GetWidth();
 
  542      inputDepth = lastLayer->GetDepth();
 
  544   if (inputSize != inputWidth) {
 
  545      Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout  - it should be %zu instead of %zu",inputSize, inputWidth);
 
  547   if (timeSteps != inputHeight && timeSteps != inputDepth) {
 
  548      Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
 
  553                                         f, fIsTraining, this->GetInitialization());
 
  554   fLayers.push_back(basicRNNLayer);
 
  555   return basicRNNLayer;
 
  559template <
typename Architecture_t, 
typename Layer_t>
 
  562   fLayers.push_back(basicRNNLayer);
 
  566template <
typename Architecture_t, 
typename Layer_t>
 
  568                                                                                      size_t timeSteps, 
bool rememberState, 
bool returnSequence)
 
  571   size_t inputHeight, inputWidth, inputDepth;
 
  572   if (fLayers.size() == 0) {
 
  573      inputHeight = this->GetInputHeight();
 
  574      inputWidth = this->GetInputWidth();
 
  575      inputDepth = this->GetInputDepth();
 
  577      Layer_t *lastLayer = fLayers.back();
 
  578      inputHeight = lastLayer->GetHeight();
 
  579      inputWidth = lastLayer->GetWidth();
 
  580      inputDepth = lastLayer->GetDepth();
 
  582   if (inputSize != inputWidth) {
 
  583      Error(
"AddBasicLSTMLayer", 
"Inconsistent input size with input layout  - it should be %zu instead of %zu", inputSize, inputWidth);
 
  585   if (timeSteps != inputHeight && timeSteps != inputDepth) {
 
  586      Error(
"AddBasicLSTMLayer", 
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
 
  593                                         fIsTraining, this->GetInitialization());
 
  594   fLayers.push_back(basicLSTMLayer);
 
  595   return basicLSTMLayer;
 
  599template <
typename Architecture_t, 
typename Layer_t>
 
  602   fLayers.push_back(basicLSTMLayer);
 
  607template <
typename Architecture_t, 
typename Layer_t>
 
  609                                                                                      size_t timeSteps, 
bool rememberState, 
bool returnSequence, 
bool resetGateAfter)
 
  612   size_t inputHeight, inputWidth, inputDepth;
 
  613   if (fLayers.size() == 0) {
 
  614      inputHeight = this->GetInputHeight();
 
  615      inputWidth = this->GetInputWidth();
 
  616      inputDepth = this->GetInputDepth();
 
  618      Layer_t *lastLayer = fLayers.back();
 
  619      inputHeight = lastLayer->GetHeight();
 
  620      inputWidth = lastLayer->GetWidth();
 
  621      inputDepth = lastLayer->GetDepth();
 
  623   if (inputSize != inputWidth) {
 
  624      Error(
"AddBasicGRULayer", 
"Inconsistent input size with input layout  - it should be %zu instead of %zu", inputSize, inputWidth);
 
  626   if (timeSteps != inputHeight && timeSteps != inputDepth) {
 
  627      Error(
"AddBasicGRULayer", 
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
 
  634                                         fIsTraining, this->GetInitialization());
 
  635   fLayers.push_back(basicGRULayer);
 
  636   return basicGRULayer;
 
  640template <
typename Architecture_t, 
typename Layer_t>
 
  643   fLayers.push_back(basicGRULayer);
 
  652template <
typename Architecture_t, 
typename Layer_t>
 
  655                                                                                        Scalar_t dropoutProbability,
 
  656                                                                                        Scalar_t corruptionLevel)
 
  658   size_t batchSize = this->GetBatchSize();
 
  660   TCorruptionLayer<Architecture_t> *corruptionLayer =
 
  661      new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
 
  662   fLayers.push_back(corruptionLayer);
 
  663   return corruptionLayer;
 
  667template <
typename Architecture_t, 
typename Layer_t>
 
  670   fLayers.push_back(corruptionLayer);
 
  674template <
typename Architecture_t, 
typename Layer_t>
 
  675TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
 
  677   std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
 
  679   size_t batchSize = this->GetBatchSize();
 
  681   TCompressionLayer<Architecture_t> *compressionLayer = 
new TCompressionLayer<Architecture_t>(
 
  682      batchSize, visibleUnits, hiddenUnits, dropoutProbability, 
f, weights, biases);
 
  683   fLayers.push_back(compressionLayer);
 
  684   return compressionLayer;
 
  688template <
typename Architecture_t, 
typename Layer_t>
 
  689void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
 
  691   fLayers.push_back(compressionLayer);
 
  695template <
typename Architecture_t, 
typename Layer_t>
 
  696TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
 
  697   size_t visibleUnits, 
size_t hiddenUnits, Scalar_t learningRate, 
EActivationFunction f, std::vector<Matrix_t> weights,
 
  698   std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
 
  700   size_t batchSize = this->GetBatchSize();
 
  702   TReconstructionLayer<Architecture_t> *reconstructionLayer = 
new TReconstructionLayer<Architecture_t>(
 
  703      batchSize, visibleUnits, hiddenUnits, learningRate, 
f, weights, biases, corruptionLevel, dropoutProbability);
 
  704   fLayers.push_back(reconstructionLayer);
 
  705   return reconstructionLayer;
 
  709template <
typename Architecture_t, 
typename Layer_t>
 
  710void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
 
  711   TReconstructionLayer<Architecture_t> *reconstructionLayer)
 
  713   fLayers.push_back(reconstructionLayer);
 
  717template <
typename Architecture_t, 
typename Layer_t>
 
  718TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
 
  719   size_t inputUnits, 
size_t outputUnits, 
size_t testDataBatchSize, Scalar_t learningRate)
 
  721   size_t batchSize = this->GetBatchSize();
 
  723   TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
 
  724      new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
 
  725   fLayers.push_back(logisticRegressionLayer);
 
  726   return logisticRegressionLayer;
 
  729template <
typename Architecture_t, 
typename Layer_t>
 
  730void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
 
  731   TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
 
  733   fLayers.push_back(logisticRegressionLayer);
 
  739template <
typename Architecture_t, 
typename Layer_t>
 
  743   size_t batchSize = this->GetBatchSize();
 
  747   Scalar_t decay = this->GetWeightDecay();
 
  749   if (fLayers.size() == 0) {
 
  750      inputWidth = this->GetInputWidth();
 
  752      Layer_t *lastLayer = fLayers.back();
 
  753      inputWidth = lastLayer->GetWidth();
 
  759   fLayers.push_back(denseLayer);
 
  765template <
typename Architecture_t, 
typename Layer_t>
 
  768   fLayers.push_back(denseLayer);
 
  772template <
typename Architecture_t, 
typename Layer_t>
 
  774                                                                                  size_t width, 
bool flattening)
 
  776   size_t batchSize = this->GetBatchSize();
 
  780   size_t outputNSlices;
 
  784   if (fLayers.size() == 0) {
 
  785      inputDepth = this->GetInputDepth();
 
  786      inputHeight = this->GetInputHeight();
 
  787      inputWidth = this->GetInputWidth();
 
  789      Layer_t *lastLayer = fLayers.back();
 
  790      inputDepth = lastLayer->GetDepth();
 
  791      inputHeight = lastLayer->GetHeight();
 
  792      inputWidth = lastLayer->GetWidth();
 
  797      outputNRows = this->GetBatchSize();
 
  799      size_t inputNCols =  inputDepth * inputHeight *  inputWidth;
 
  800      if (outputNCols != 0 && outputNCols != inputNCols ) {
 
  801         Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
 
  802              inputDepth, inputHeight, inputWidth, depth, 
height, 
width,inputNCols);
 
  804      outputNCols = inputNCols;
 
  809      outputNSlices = this->GetBatchSize();
 
  816                                        outputNSlices, outputNRows, outputNCols, flattening);
 
  818   fLayers.push_back(reshapeLayer);
 
  824template <
typename Architecture_t, 
typename Layer_t>
 
  828   size_t batchSize = this->GetBatchSize();
 
  829   size_t inputDepth = 0;
 
  830   size_t inputHeight = 0;
 
  831   size_t inputWidth = 0;
 
  834   std::vector<size_t>  shape = {1, 1, 1};
 
  835   if (fLayers.size() == 0) {
 
  836      inputDepth = this->GetInputDepth();
 
  837      inputHeight = this->GetInputHeight();
 
  838      inputWidth = this->GetInputWidth();
 
  840      shape[0] = batchSize;
 
  841      shape[1] = inputWidth;
 
  844      Layer_t *lastLayer = fLayers.back();
 
  845      inputDepth = lastLayer->GetDepth();
 
  846      inputHeight = lastLayer->GetHeight();
 
  847      inputWidth = lastLayer->GetWidth();
 
  848      shape = lastLayer->GetOutput().GetShape();
 
  852      if (shape.size() > 3) {
 
  853         for (
size_t i = 3; i < shape.size(); ++i)
 
  854            shape[2] *= shape[i];
 
  863   fLayers.push_back(bnormLayer);
 
  869template <
typename Architecture_t, 
typename Layer_t>
 
  872   fLayers.push_back(reshapeLayer);
 
  876template <
typename Architecture_t, 
typename Layer_t>
 
  879   for (
size_t i = 0; i < fLayers.size(); i++) {
 
  880      fLayers[i]->Initialize();
 
  885template <
typename Architecture_t, 
typename Layer_t>
 
  888   for (
size_t i = 0; i < fLayers.size(); i++) {
 
  889      fLayers[i]->ResetTraining();
 
  895template <
typename Architecture_t, 
typename Layer_t>
 
  898   fLayers.front()->Forward(
input, applyDropout);
 
  900   for (
size_t i = 1; i < fLayers.size(); i++) {
 
  901      fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
 
  910template <
typename Architecture_t, 
typename Layer_t>
 
  912                                                 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
 
  913                                                 Scalar_t corruptionLevel, Scalar_t dropoutProbability, 
size_t epochs,
 
  916   std::vector<Matrix_t> inp1;
 
  917   std::vector<Matrix_t> inp2;
 
  918   size_t numOfHiddenLayers = 
sizeof(numHiddenUnitsPerLayer) / 
sizeof(numHiddenUnitsPerLayer[0]);
 
  920   size_t visibleUnits = (size_t)
input[0].GetNrows();
 
  922   AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
 
  923   fLayers.back()->Initialize();
 
  924   fLayers.back()->Forward(
input, applyDropout);
 
  927   AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, 
f, fLayers.back()->GetWeights(),
 
  928                       fLayers.back()->GetBiases());
 
  929   fLayers.back()->Initialize();
 
  930   fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); 
 
  932   AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, 
f, fLayers.back()->GetWeights(),
 
  933                          fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
 
  934   fLayers.back()->Initialize();
 
  935   fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
 
  937   fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
 
  940   size_t weightsSize = fLayers.back()->GetWeights().size();
 
  941   size_t biasesSize = fLayers.back()->GetBiases().size();
 
  942   for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
 
  944      for (
size_t j = 0; j < weightsSize; j++) {
 
  945         Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
 
  947      for (
size_t j = 0; j < biasesSize; j++) {
 
  948         Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
 
  950      fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
 
  951      fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
 
  952      fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
 
  953                                            fLayers[fLayers.size() - 3]->GetOutput(), 
input);
 
  955   fLayers.back()->Print();
 
  957   for (
size_t i = 1; i < numOfHiddenLayers; i++) {
 
  959      AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
 
  960      fLayers.back()->Initialize();
 
  961      fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
 
  964      AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, 
f,
 
  965                          fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
 
  966      fLayers.back()->Initialize();
 
  967      fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
 
  969      AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, 
f,
 
  970                             fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
 
  972      fLayers.back()->Initialize();
 
  973      fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
 
  975      fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
 
  976                               fLayers[fLayers.size() - 5]->GetOutput());
 
  979      size_t _weightsSize = fLayers.back()->GetWeights().size();
 
  980      size_t _biasesSize = fLayers.back()->GetBiases().size();
 
  981      for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
 
  983         for (
size_t j = 0; j < _weightsSize; j++) {
 
  984            Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
 
  986         for (
size_t j = 0; j < _biasesSize; j++) {
 
  987            Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
 
  989         fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
 
  990         fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
 
  991         fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
 
  992                                               fLayers[fLayers.size() - 3]->GetOutput(),
 
  993                                               fLayers[fLayers.size() - 5]->GetOutput());
 
  995      fLayers.back()->Print();
 
 1000template <
typename Architecture_t, 
typename Layer_t>
 
 1001auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &
input, std::vector<Matrix_t> &testInput,
 
 1002                                                 std::vector<Matrix_t> &inputLabel, 
size_t outputUnits,
 
 1003                                                 size_t testDataBatchSize, Scalar_t learningRate, 
size_t epochs) -> 
void 
 1005   std::vector<Matrix_t> inp1;
 
 1006   std::vector<Matrix_t> inp2;
 
 1007   if (fLayers.size() == 0) 
 
 1009      size_t inputUnits = 
input[0].GetNrows();
 
 1011      AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
 
 1012      fLayers.back()->Initialize();
 
 1013      for (
size_t i = 0; i < epochs; i++) {
 
 1014         fLayers.back()->Backward(inputLabel, inp1, 
input, inp2);
 
 1016      fLayers.back()->Forward(
input, 
false);
 
 1017      fLayers.back()->Print();
 
 1019      size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
 
 1020      AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
 
 1021      fLayers.back()->Initialize();
 
 1022      for (
size_t i = 0; i < epochs; i++) {
 
 1023         fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
 
 1025      fLayers.back()->Forward(testInput, 
false);
 
 1026      fLayers.back()->Print();
 
 1032template <
typename Architecture_t, 
typename Layer_t>
 
 1039   Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
 
 1040   Matrix_t last_output = fLayers.back()->GetOutputAt(0);
 
 1041   evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
 
 1042                                     last_output, weights);
 
 1044   for (
size_t i = fLayers.size() - 1; i > 0; i--) {
 
 1045      auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
 
 1046      auto &activations_backward = fLayers[i - 1]->GetOutput();
 
 1047      fLayers[i]->Backward(activation_gradient_backward, activations_backward);
 
 1053   fLayers[0]->Backward(dummy, 
input);
 
 1056#ifdef USE_PARALLEL_DEEPNET 
 1059template <
typename Architecture_t, 
typename Layer_t>
 
 1062                                                        bool applyDropout) -> 
void 
 1064   size_t depth = this->GetDepth();
 
 1067   for (
size_t i = 0; i < nets.size(); i++) {
 
 1068      nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
 
 1072   for (
size_t i = 1; i < depth; i++) {
 
 1073      for (
size_t j = 0; j < nets.size(); j++) {
 
 1074         nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
 
 1080template <
typename Architecture_t, 
typename Layer_t>
 
 1081auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
 
 1082                                                         std::vector<TTensorBatch<Architecture_t>> &batches,
 
 1083                                                         Scalar_t learningRate) -> 
void 
 1085   std::vector<Matrix_t> inp1;
 
 1086   std::vector<Matrix_t> inp2;
 
 1087   size_t depth = this->GetDepth();
 
 1090   for (
size_t i = 0; i < nets.size(); i++) {
 
 1091      evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
 
 1092                                        nets[i].GetLossFunction(), batches[i].GetOutput(),
 
 1093                                        nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
 
 1097   for (
size_t i = depth - 1; i > 0; i--) {
 
 1098      for (
size_t j = 0; j < nets.size(); j++) {
 
 1099         nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
 
 1100                                         nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
 
 1104   std::vector<Matrix_t> dummy;
 
 1107   for (
size_t i = 0; i < nets.size(); i++) {
 
 1108      nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
 
 1112   for (
size_t i = 0; i < nets.size(); i++) {
 
 1113      for (
size_t j = 0; j < depth; j++) {
 
 1114         Layer_t *masterLayer = this->GetLayerAt(j);
 
 1115         Layer_t *layer = nets[i].GetLayerAt(j);
 
 1117         masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
 
 1118         layer->CopyWeights(masterLayer->GetWeights());
 
 1120         masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
 
 1121         layer->CopyBiases(masterLayer->GetBiases());
 
 1127template <
typename Architecture_t, 
typename Layer_t>
 
 1128auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
 
 1129                                                                 std::vector<TTensorBatch<Architecture_t>> &batches,
 
 1130                                                                 Scalar_t learningRate, Scalar_t momentum) -> 
void 
 1132   std::vector<Matrix_t> inp1;
 
 1133   std::vector<Matrix_t> inp2;
 
 1134   size_t depth = this->GetDepth();
 
 1137   for (
size_t i = 0; i < nets.size(); i++) {
 
 1138      evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
 
 1139                                        nets[i].GetLossFunction(), batches[i].GetOutput(),
 
 1140                                        nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
 
 1144   for (
size_t i = depth - 1; i > 0; i--) {
 
 1145      Layer_t *masterLayer = this->GetLayerAt(i);
 
 1147      for (
size_t j = 0; j < nets.size(); j++) {
 
 1148         Layer_t *layer = nets[j].GetLayerAt(i);
 
 1150         layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
 
 1152         masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
 
 1153         masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
 
 1156      masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
 
 1157      masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
 
 1160   std::vector<Matrix_t> dummy;
 
 1163   Layer_t *masterFirstLayer = this->GetLayerAt(0);
 
 1164   for (
size_t i = 0; i < nets.size(); i++) {
 
 1165      Layer_t *layer = nets[i].GetLayerAt(0);
 
 1167      layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
 
 1169      masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
 
 1170      masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
 
 1173   masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
 
 1174   masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
 
 1176   for (
size_t i = 0; i < depth; i++) {
 
 1177      Layer_t *masterLayer = this->GetLayerAt(i);
 
 1178      masterLayer->Update(1.0);
 
 1180      for (
size_t j = 0; j < nets.size(); j++) {
 
 1181         Layer_t *layer = nets[j].GetLayerAt(i);
 
 1183         layer->CopyWeights(masterLayer->GetWeights());
 
 1184         layer->CopyBiases(masterLayer->GetBiases());
 
 1190template <
typename Architecture_t, 
typename Layer_t>
 
 1191auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
 
 1192                                                                 std::vector<TTensorBatch<Architecture_t>> &batches,
 
 1193                                                                 Scalar_t learningRate, Scalar_t momentum) -> 
void 
 1195   std::cout << 
"Parallel Backward Nestorov" << std::endl;
 
 1196   std::vector<Matrix_t> inp1;
 
 1197   std::vector<Matrix_t> inp2;
 
 1198   size_t depth = this->GetDepth();
 
 1201   for (
size_t i = 0; i < nets.size(); i++) {
 
 1202      evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
 
 1203                                        nets[i].GetLossFunction(), batches[i].GetOutput(),
 
 1204                                        nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
 
 1208   for (
size_t i = depth - 1; i > 0; i--) {
 
 1209      for (
size_t j = 0; j < nets.size(); j++) {
 
 1210         Layer_t *layer = nets[j].GetLayerAt(i);
 
 1212         layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
 
 1217   std::vector<Matrix_t> dummy;
 
 1220   for (
size_t i = 0; i < nets.size(); i++) {
 
 1221      Layer_t *layer = nets[i].GetLayerAt(0);
 
 1222      layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
 
 1225   for (
size_t i = 0; i < depth; i++) {
 
 1226      Layer_t *masterLayer = this->GetLayerAt(i);
 
 1227      for (
size_t j = 0; j < nets.size(); j++) {
 
 1228         Layer_t *layer = nets[j].GetLayerAt(i);
 
 1230         layer->CopyWeights(masterLayer->GetWeights());
 
 1231         layer->CopyBiases(masterLayer->GetBiases());
 
 1233         layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
 
 1234         layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
 
 1237      for (
size_t j = 0; j < nets.size(); j++) {
 
 1238         Layer_t *layer = nets[j].GetLayerAt(i);
 
 1240         masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
 
 1241         masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
 
 1244      masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
 
 1245      masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
 
 1247      masterLayer->Update(1.0);
 
 1253template <
typename Architecture_t, 
typename Layer_t>
 
 1256   for (
size_t i = 0; i < fLayers.size(); i++) {
 
 1257      fLayers[i]->Update(learningRate);
 
 1262template <
typename Architecture_t, 
typename Layer_t>
 
 1264                                             bool includeRegularization) 
const -> 
Scalar_t 
 1267   auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
 
 1270   if (includeRegularization) {
 
 1271      loss += RegularizationTerm();
 
 1278template <
typename Architecture_t, 
typename Layer_t>
 
 1280                                             const Matrix_t &weights, 
bool inTraining, 
bool includeRegularization)
 
 1283   Forward(
input, inTraining);
 
 1284   return Loss(groundTruth, weights, includeRegularization);
 
 1288template <
typename Architecture_t, 
typename Layer_t>
 
 1292   for (
size_t i = 0; i < fLayers.size(); i++) {
 
 1293      for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
 
 1294         reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
 
 1297   return this->GetWeightDecay() * 
reg;
 
 1302template <
typename Architecture_t, 
typename Layer_t>
 
 1306   evaluate<Architecture_t>(predictions, 
f, fLayers.back()->GetOutputAt(0));
 
 1310template <
typename Architecture_t, 
typename Layer_t>
 
 1314   Forward(
input, 
false);
 
 1316   evaluate<Architecture_t>(predictions, 
f, fLayers.back()->GetOutputAt(0));
 
 1320template <
typename Architecture_t, 
typename Layer_t>
 
 1323   std::cout << 
"DEEP NEURAL NETWORK:   Depth = " << this->GetDepth();
 
 1324   std::cout << 
"  Input = ( " << this->GetInputDepth();
 
 1325   std::cout << 
", " << this->GetInputHeight();
 
 1326   std::cout << 
", " << this->GetInputWidth() << 
" )";
 
 1327   std::cout << 
"  Batch size = " << this->GetBatchSize();
 
 1328   std::cout << 
"  Loss function = " << 
static_cast<char>(this->GetLossFunction()) << std::endl;
 
 1332   for (
size_t i = 0; i < fLayers.size(); i++) {
 
 1333      std::cout << 
"\tLayer " << i << 
"\t";
 
 1334      fLayers[i]->Print();
 
 1339template <
typename Architecture_t, 
typename Layer_t>
 
 1341    const std::vector<Double_t> & probabilities)
 
 1343   for (
size_t i = 0; i < fLayers.size(); i++) {
 
 1344      if (i < probabilities.size()) {
 
 1345         fLayers[i]->SetDropoutProbability(probabilities[i]);
 
 1347         fLayers[i]->SetDropoutProbability(1.0);
 
#define R(a, b, c, d, e, f, g, h, i)
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void reg
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at position i.
void Print() const
Print the Deep Net Info.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
size_t GetInputWidth() const
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
typename Architecture_t::Tensor_t Tensor_t
ELossFunction fJ
The loss function of the network.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void SetBatchWidth(size_t batchWidth)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
create variable transformations