29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
48#include "TMVA/DNN/DAE/CompressionLayer.h"
49#include "TMVA/DNN/DAE/CorruptionLayer.h"
50#include "TMVA/DNN/DAE/ReconstructionLayer.h"
51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
72template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
76 using Tensor_t =
typename Architecture_t::Tensor_t;
77 using Matrix_t =
typename Architecture_t::Matrix_t;
78 using Scalar_t =
typename Architecture_t::Scalar_t;
109 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
125 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
138 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
147 bool rememberState =
false,
bool returnSequence =
false,
157 bool rememberState =
false,
bool returnSequence =
false);
166 bool rememberState =
false,
bool returnSequence =
false,
167 bool resetGateAfter =
false);
199 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
204 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
209 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
211 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
215 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
221 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
223 std::vector<Matrix_t> weights,
224 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
229 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
233 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
234 size_t testDataBatchSize,
239 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
243 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
245 bool applyDropout =
false);
250 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
251 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
269#ifdef USE_PARALLEL_DEEPNET
307 bool inTraining =
false,
bool includeRegularization =
true);
377template <
typename Architecture_t,
typename Layer_t>
379 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
381 fIsTraining(true), fWeightDecay(0.0)
387template <
typename Architecture_t,
typename Layer_t>
389 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
391 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
392 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
399template <
typename Architecture_t,
typename Layer_t>
401 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
402 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
403 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
404 fWeightDecay(deepNet.fWeightDecay)
410template <
typename Architecture_t,
typename Layer_t>
414 for (
auto layer : fLayers)
420template <
typename Architecture_t,
typename Layer_t>
423 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
424 if (!isInteger(dimension) || dimension <= 0) {
426 int iLayer = fLayers.size();
427 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
428 iLayer, imgDim, fltDim, padding, stride);
434 return (
size_t)dimension;
438template <
typename Architecture_t,
typename Layer_t>
440 size_t filterWidth,
size_t strideRows,
441 size_t strideCols,
size_t paddingHeight,
446 size_t batchSize = this->GetBatchSize();
452 Scalar_t decay = this->GetWeightDecay();
454 if (fLayers.size() == 0) {
455 inputDepth = this->GetInputDepth();
456 inputHeight = this->GetInputHeight();
457 inputWidth = this->GetInputWidth();
459 Layer_t *lastLayer = fLayers.back();
460 inputDepth = lastLayer->GetDepth();
461 inputHeight = lastLayer->GetHeight();
462 inputWidth = lastLayer->GetWidth();
469 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
470 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
472 fLayers.push_back(convLayer);
477template <
typename Architecture_t,
typename Layer_t>
480 fLayers.push_back(convLayer);
484template <
typename Architecture_t,
typename Layer_t>
486 size_t strideRows,
size_t strideCols,
489 size_t batchSize = this->GetBatchSize();
494 if (fLayers.size() == 0) {
495 inputDepth = this->GetInputDepth();
496 inputHeight = this->GetInputHeight();
497 inputWidth = this->GetInputWidth();
499 Layer_t *lastLayer = fLayers.back();
500 inputDepth = lastLayer->GetDepth();
501 inputHeight = lastLayer->GetHeight();
502 inputWidth = lastLayer->GetWidth();
506 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
507 strideRows, strideCols, dropoutProbability);
510 fLayers.push_back(maxPoolLayer);
516template <
typename Architecture_t,
typename Layer_t>
519 fLayers.push_back(maxPoolLayer);
523template <
typename Architecture_t,
typename Layer_t>
526 bool rememberState,
bool returnSequence,
533 size_t inputHeight, inputWidth, inputDepth;
534 if (fLayers.size() == 0) {
535 inputHeight = this->GetInputHeight();
536 inputWidth = this->GetInputWidth();
537 inputDepth = this->GetInputDepth();
539 Layer_t *lastLayer = fLayers.back();
540 inputHeight = lastLayer->GetHeight();
541 inputWidth = lastLayer->GetWidth();
542 inputDepth = lastLayer->GetDepth();
544 if (inputSize != inputWidth) {
545 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
547 if (timeSteps != inputHeight && timeSteps != inputDepth) {
548 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
553 f, fIsTraining, this->GetInitialization());
554 fLayers.push_back(basicRNNLayer);
555 return basicRNNLayer;
559template <
typename Architecture_t,
typename Layer_t>
562 fLayers.push_back(basicRNNLayer);
566template <
typename Architecture_t,
typename Layer_t>
568 size_t timeSteps,
bool rememberState,
bool returnSequence)
571 size_t inputHeight, inputWidth, inputDepth;
572 if (fLayers.size() == 0) {
573 inputHeight = this->GetInputHeight();
574 inputWidth = this->GetInputWidth();
575 inputDepth = this->GetInputDepth();
577 Layer_t *lastLayer = fLayers.back();
578 inputHeight = lastLayer->GetHeight();
579 inputWidth = lastLayer->GetWidth();
580 inputDepth = lastLayer->GetDepth();
582 if (inputSize != inputWidth) {
583 Error(
"AddBasicLSTMLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
585 if (timeSteps != inputHeight && timeSteps != inputDepth) {
586 Error(
"AddBasicLSTMLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
593 fIsTraining, this->GetInitialization());
594 fLayers.push_back(basicLSTMLayer);
595 return basicLSTMLayer;
599template <
typename Architecture_t,
typename Layer_t>
602 fLayers.push_back(basicLSTMLayer);
607template <
typename Architecture_t,
typename Layer_t>
609 size_t timeSteps,
bool rememberState,
bool returnSequence,
bool resetGateAfter)
612 size_t inputHeight, inputWidth, inputDepth;
613 if (fLayers.size() == 0) {
614 inputHeight = this->GetInputHeight();
615 inputWidth = this->GetInputWidth();
616 inputDepth = this->GetInputDepth();
618 Layer_t *lastLayer = fLayers.back();
619 inputHeight = lastLayer->GetHeight();
620 inputWidth = lastLayer->GetWidth();
621 inputDepth = lastLayer->GetDepth();
623 if (inputSize != inputWidth) {
624 Error(
"AddBasicGRULayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
626 if (timeSteps != inputHeight && timeSteps != inputDepth) {
627 Error(
"AddBasicGRULayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
634 fIsTraining, this->GetInitialization());
635 fLayers.push_back(basicGRULayer);
636 return basicGRULayer;
640template <
typename Architecture_t,
typename Layer_t>
643 fLayers.push_back(basicGRULayer);
652template <
typename Architecture_t,
typename Layer_t>
655 Scalar_t dropoutProbability,
656 Scalar_t corruptionLevel)
658 size_t batchSize = this->GetBatchSize();
660 TCorruptionLayer<Architecture_t> *corruptionLayer =
661 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
662 fLayers.push_back(corruptionLayer);
663 return corruptionLayer;
667template <
typename Architecture_t,
typename Layer_t>
670 fLayers.push_back(corruptionLayer);
674template <
typename Architecture_t,
typename Layer_t>
675TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
677 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
679 size_t batchSize = this->GetBatchSize();
681 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
682 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
683 fLayers.push_back(compressionLayer);
684 return compressionLayer;
688template <
typename Architecture_t,
typename Layer_t>
689void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
691 fLayers.push_back(compressionLayer);
695template <
typename Architecture_t,
typename Layer_t>
696TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
697 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
698 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
700 size_t batchSize = this->GetBatchSize();
702 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
703 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
704 fLayers.push_back(reconstructionLayer);
705 return reconstructionLayer;
709template <
typename Architecture_t,
typename Layer_t>
710void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
711 TReconstructionLayer<Architecture_t> *reconstructionLayer)
713 fLayers.push_back(reconstructionLayer);
717template <
typename Architecture_t,
typename Layer_t>
718TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
719 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
721 size_t batchSize = this->GetBatchSize();
723 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
724 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
725 fLayers.push_back(logisticRegressionLayer);
726 return logisticRegressionLayer;
729template <
typename Architecture_t,
typename Layer_t>
730void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
731 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
733 fLayers.push_back(logisticRegressionLayer);
739template <
typename Architecture_t,
typename Layer_t>
743 size_t batchSize = this->GetBatchSize();
747 Scalar_t decay = this->GetWeightDecay();
749 if (fLayers.size() == 0) {
750 inputWidth = this->GetInputWidth();
752 Layer_t *lastLayer = fLayers.back();
753 inputWidth = lastLayer->GetWidth();
759 fLayers.push_back(denseLayer);
765template <
typename Architecture_t,
typename Layer_t>
768 fLayers.push_back(denseLayer);
772template <
typename Architecture_t,
typename Layer_t>
774 size_t width,
bool flattening)
776 size_t batchSize = this->GetBatchSize();
780 size_t outputNSlices;
784 if (fLayers.size() == 0) {
785 inputDepth = this->GetInputDepth();
786 inputHeight = this->GetInputHeight();
787 inputWidth = this->GetInputWidth();
789 Layer_t *lastLayer = fLayers.back();
790 inputDepth = lastLayer->GetDepth();
791 inputHeight = lastLayer->GetHeight();
792 inputWidth = lastLayer->GetWidth();
797 outputNRows = this->GetBatchSize();
798 outputNCols = depth * height *
width;
799 size_t inputNCols = inputDepth * inputHeight * inputWidth;
800 if (outputNCols != 0 && outputNCols != inputNCols ) {
801 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
802 inputDepth, inputHeight, inputWidth, depth, height,
width,inputNCols);
804 outputNCols = inputNCols;
809 outputNSlices = this->GetBatchSize();
811 outputNCols = height *
width;
816 outputNSlices, outputNRows, outputNCols, flattening);
818 fLayers.push_back(reshapeLayer);
824template <
typename Architecture_t,
typename Layer_t>
828 size_t batchSize = this->GetBatchSize();
829 size_t inputDepth = 0;
830 size_t inputHeight = 0;
831 size_t inputWidth = 0;
834 std::vector<size_t> shape = {1, 1, 1};
835 if (fLayers.size() == 0) {
836 inputDepth = this->GetInputDepth();
837 inputHeight = this->GetInputHeight();
838 inputWidth = this->GetInputWidth();
840 shape[0] = batchSize;
841 shape[1] = inputWidth;
844 Layer_t *lastLayer = fLayers.back();
845 inputDepth = lastLayer->GetDepth();
846 inputHeight = lastLayer->GetHeight();
847 inputWidth = lastLayer->GetWidth();
848 shape = lastLayer->GetOutput().GetShape();
852 if (shape.size() > 3) {
853 for (
size_t i = 3; i < shape.size(); ++i)
854 shape[2] *= shape[i];
863 fLayers.push_back(bnormLayer);
869template <
typename Architecture_t,
typename Layer_t>
872 fLayers.push_back(reshapeLayer);
876template <
typename Architecture_t,
typename Layer_t>
879 for (
size_t i = 0; i < fLayers.size(); i++) {
880 fLayers[i]->Initialize();
885template <
typename Architecture_t,
typename Layer_t>
888 for (
size_t i = 0; i < fLayers.size(); i++) {
889 fLayers[i]->ResetTraining();
895template <
typename Architecture_t,
typename Layer_t>
898 fLayers.front()->Forward(input, applyDropout);
900 for (
size_t i = 1; i < fLayers.size(); i++) {
901 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
910template <
typename Architecture_t,
typename Layer_t>
912 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
913 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
916 std::vector<Matrix_t> inp1;
917 std::vector<Matrix_t> inp2;
918 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
920 size_t visibleUnits = (size_t)input[0].GetNrows();
922 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
923 fLayers.back()->Initialize();
924 fLayers.back()->Forward(input, applyDropout);
927 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
928 fLayers.back()->GetBiases());
929 fLayers.back()->Initialize();
930 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
932 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
933 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
934 fLayers.back()->Initialize();
935 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
937 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
940 size_t weightsSize = fLayers.back()->GetWeights().size();
941 size_t biasesSize = fLayers.back()->GetBiases().size();
942 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
944 for (
size_t j = 0; j < weightsSize; j++) {
945 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
947 for (
size_t j = 0; j < biasesSize; j++) {
948 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
950 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
951 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
952 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
953 fLayers[fLayers.size() - 3]->GetOutput(), input);
955 fLayers.back()->Print();
957 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
959 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
960 fLayers.back()->Initialize();
961 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
964 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
965 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
966 fLayers.back()->Initialize();
967 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
969 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
970 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
972 fLayers.back()->Initialize();
973 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
975 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
976 fLayers[fLayers.size() - 5]->GetOutput());
979 size_t _weightsSize = fLayers.back()->GetWeights().size();
980 size_t _biasesSize = fLayers.back()->GetBiases().size();
981 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
983 for (
size_t j = 0; j < _weightsSize; j++) {
984 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
986 for (
size_t j = 0; j < _biasesSize; j++) {
987 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
989 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
990 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
991 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
992 fLayers[fLayers.size() - 3]->GetOutput(),
993 fLayers[fLayers.size() - 5]->GetOutput());
995 fLayers.back()->Print();
1000template <
typename Architecture_t,
typename Layer_t>
1001auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
1002 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
1003 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
1005 std::vector<Matrix_t> inp1;
1006 std::vector<Matrix_t> inp2;
1007 if (fLayers.size() == 0)
1009 size_t inputUnits = input[0].GetNrows();
1011 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1012 fLayers.back()->Initialize();
1013 for (
size_t i = 0; i < epochs; i++) {
1014 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1016 fLayers.back()->Forward(input,
false);
1017 fLayers.back()->Print();
1019 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1020 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1021 fLayers.back()->Initialize();
1022 for (
size_t i = 0; i < epochs; i++) {
1023 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1025 fLayers.back()->Forward(testInput,
false);
1026 fLayers.back()->Print();
1032template <
typename Architecture_t,
typename Layer_t>
1039 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
1040 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
1041 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
1042 last_output, weights);
1044 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
1045 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
1046 auto &activations_backward = fLayers[i - 1]->GetOutput();
1047 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1053 fLayers[0]->Backward(dummy, input);
1056#ifdef USE_PARALLEL_DEEPNET
1059template <
typename Architecture_t,
typename Layer_t>
1062 bool applyDropout) ->
void
1064 size_t depth = this->GetDepth();
1067 for (
size_t i = 0; i < nets.size(); i++) {
1068 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1072 for (
size_t i = 1; i < depth; i++) {
1073 for (
size_t j = 0; j < nets.size(); j++) {
1074 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1080template <
typename Architecture_t,
typename Layer_t>
1081auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1082 std::vector<TTensorBatch<Architecture_t>> &batches,
1083 Scalar_t learningRate) ->
void
1085 std::vector<Matrix_t> inp1;
1086 std::vector<Matrix_t> inp2;
1087 size_t depth = this->GetDepth();
1090 for (
size_t i = 0; i < nets.size(); i++) {
1091 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1092 nets[i].GetLossFunction(), batches[i].GetOutput(),
1093 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1097 for (
size_t i = depth - 1; i > 0; i--) {
1098 for (
size_t j = 0; j < nets.size(); j++) {
1099 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1100 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1104 std::vector<Matrix_t> dummy;
1107 for (
size_t i = 0; i < nets.size(); i++) {
1108 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1112 for (
size_t i = 0; i < nets.size(); i++) {
1113 for (
size_t j = 0; j < depth; j++) {
1114 Layer_t *masterLayer = this->GetLayerAt(j);
1115 Layer_t *layer = nets[i].GetLayerAt(j);
1117 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1118 layer->CopyWeights(masterLayer->GetWeights());
1120 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1121 layer->CopyBiases(masterLayer->GetBiases());
1127template <
typename Architecture_t,
typename Layer_t>
1128auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1129 std::vector<TTensorBatch<Architecture_t>> &batches,
1130 Scalar_t learningRate, Scalar_t momentum) ->
void
1132 std::vector<Matrix_t> inp1;
1133 std::vector<Matrix_t> inp2;
1134 size_t depth = this->GetDepth();
1137 for (
size_t i = 0; i < nets.size(); i++) {
1138 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1139 nets[i].GetLossFunction(), batches[i].GetOutput(),
1140 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1144 for (
size_t i = depth - 1; i > 0; i--) {
1145 Layer_t *masterLayer = this->GetLayerAt(i);
1147 for (
size_t j = 0; j < nets.size(); j++) {
1148 Layer_t *layer = nets[j].GetLayerAt(i);
1150 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1152 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1153 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1156 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1157 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1160 std::vector<Matrix_t> dummy;
1163 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1164 for (
size_t i = 0; i < nets.size(); i++) {
1165 Layer_t *layer = nets[i].GetLayerAt(0);
1167 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1169 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1170 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1173 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1174 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1176 for (
size_t i = 0; i < depth; i++) {
1177 Layer_t *masterLayer = this->GetLayerAt(i);
1178 masterLayer->Update(1.0);
1180 for (
size_t j = 0; j < nets.size(); j++) {
1181 Layer_t *layer = nets[j].GetLayerAt(i);
1183 layer->CopyWeights(masterLayer->GetWeights());
1184 layer->CopyBiases(masterLayer->GetBiases());
1190template <
typename Architecture_t,
typename Layer_t>
1191auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1192 std::vector<TTensorBatch<Architecture_t>> &batches,
1193 Scalar_t learningRate, Scalar_t momentum) ->
void
1195 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1196 std::vector<Matrix_t> inp1;
1197 std::vector<Matrix_t> inp2;
1198 size_t depth = this->GetDepth();
1201 for (
size_t i = 0; i < nets.size(); i++) {
1202 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1203 nets[i].GetLossFunction(), batches[i].GetOutput(),
1204 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1208 for (
size_t i = depth - 1; i > 0; i--) {
1209 for (
size_t j = 0; j < nets.size(); j++) {
1210 Layer_t *layer = nets[j].GetLayerAt(i);
1212 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1217 std::vector<Matrix_t> dummy;
1220 for (
size_t i = 0; i < nets.size(); i++) {
1221 Layer_t *layer = nets[i].GetLayerAt(0);
1222 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1225 for (
size_t i = 0; i < depth; i++) {
1226 Layer_t *masterLayer = this->GetLayerAt(i);
1227 for (
size_t j = 0; j < nets.size(); j++) {
1228 Layer_t *layer = nets[j].GetLayerAt(i);
1230 layer->CopyWeights(masterLayer->GetWeights());
1231 layer->CopyBiases(masterLayer->GetBiases());
1233 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1234 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1237 for (
size_t j = 0; j < nets.size(); j++) {
1238 Layer_t *layer = nets[j].GetLayerAt(i);
1240 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1241 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1244 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1245 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1247 masterLayer->Update(1.0);
1253template <
typename Architecture_t,
typename Layer_t>
1256 for (
size_t i = 0; i < fLayers.size(); i++) {
1257 fLayers[i]->Update(learningRate);
1262template <
typename Architecture_t,
typename Layer_t>
1264 bool includeRegularization)
const ->
Scalar_t
1267 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1270 if (includeRegularization) {
1271 loss += RegularizationTerm();
1278template <
typename Architecture_t,
typename Layer_t>
1280 const Matrix_t &weights,
bool inTraining,
bool includeRegularization)
1283 Forward(input, inTraining);
1284 return Loss(groundTruth, weights, includeRegularization);
1288template <
typename Architecture_t,
typename Layer_t>
1292 for (
size_t i = 0; i < fLayers.size(); i++) {
1293 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1294 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1297 return this->GetWeightDecay() * reg;
1302template <
typename Architecture_t,
typename Layer_t>
1306 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1310template <
typename Architecture_t,
typename Layer_t>
1314 Forward(input,
false);
1316 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1320template <
typename Architecture_t,
typename Layer_t>
1323 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1324 std::cout <<
" Input = ( " << this->GetInputDepth();
1325 std::cout <<
", " << this->GetInputHeight();
1326 std::cout <<
", " << this->GetInputWidth() <<
" )";
1327 std::cout <<
" Batch size = " << this->GetBatchSize();
1328 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1332 for (
size_t i = 0; i < fLayers.size(); i++) {
1333 std::cout <<
"\tLayer " << i <<
"\t";
1334 fLayers[i]->Print();
1339template <
typename Architecture_t,
typename Layer_t>
1341 const std::vector<Double_t> & probabilities)
1343 for (
size_t i = 0; i < fLayers.size(); i++) {
1344 if (i < probabilities.size()) {
1345 fLayers[i]->SetDropoutProbability(probabilities[i]);
1347 fLayers[i]->SetDropoutProbability(1.0);
#define R(a, b, c, d, e, f, g, h, i)
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Print() const
Print the Deep Net Info.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
size_t GetInputWidth() const
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
typename Architecture_t::Tensor_t Tensor_t
ELossFunction fJ
The loss function of the network.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void SetBatchWidth(size_t batchWidth)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
create variable transformations