29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
50#include "TMVA/DNN/DAE/CompressionLayer.h"
51#include "TMVA/DNN/DAE/CorruptionLayer.h"
52#include "TMVA/DNN/DAE/ReconstructionLayer.h"
53#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
74template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
78 using Tensor_t =
typename Architecture_t::Tensor_t;
79 using Matrix_t =
typename Architecture_t::Matrix_t;
80 using Scalar_t =
typename Architecture_t::Scalar_t;
111 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
127 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
140 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
149 bool rememberState =
false,
bool returnSequence =
false,
159 bool rememberState =
false,
bool returnSequence =
false);
168 bool rememberState =
false,
bool returnSequence =
false,
169 bool resetGateAfter =
false);
201 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
206 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
211 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
213 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
217 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
223 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
225 std::vector<Matrix_t> weights,
226 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
231 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
235 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
236 size_t testDataBatchSize,
241 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
245 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
247 bool applyDropout =
false);
252 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
253 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
271#ifdef USE_PARALLEL_DEEPNET
309 bool inTraining =
false,
bool includeRegularization =
true);
379template <
typename Architecture_t,
typename Layer_t>
381 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
383 fIsTraining(true), fWeightDecay(0.0)
389template <
typename Architecture_t,
typename Layer_t>
391 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
393 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
394 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
401template <
typename Architecture_t,
typename Layer_t>
403 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
404 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
405 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
406 fWeightDecay(deepNet.fWeightDecay)
412template <
typename Architecture_t,
typename Layer_t>
416 for (
auto layer : fLayers)
422template <
typename Architecture_t,
typename Layer_t>
425 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
426 if (!isInteger(dimension) || dimension <= 0) {
428 int iLayer = fLayers.size();
429 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
430 iLayer, imgDim, fltDim, padding, stride);
436 return (
size_t)dimension;
440template <
typename Architecture_t,
typename Layer_t>
442 size_t filterWidth,
size_t strideRows,
443 size_t strideCols,
size_t paddingHeight,
448 size_t batchSize = this->GetBatchSize();
454 Scalar_t decay = this->GetWeightDecay();
456 if (fLayers.size() == 0) {
457 inputDepth = this->GetInputDepth();
458 inputHeight = this->GetInputHeight();
459 inputWidth = this->GetInputWidth();
461 Layer_t *lastLayer = fLayers.back();
462 inputDepth = lastLayer->GetDepth();
463 inputHeight = lastLayer->GetHeight();
464 inputWidth = lastLayer->GetWidth();
471 batchSize, inputDepth, inputHeight, inputWidth, depth,
init, filterHeight, filterWidth, strideRows,
472 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
474 fLayers.push_back(convLayer);
479template <
typename Architecture_t,
typename Layer_t>
482 fLayers.push_back(convLayer);
486template <
typename Architecture_t,
typename Layer_t>
488 size_t strideRows,
size_t strideCols,
491 size_t batchSize = this->GetBatchSize();
496 if (fLayers.size() == 0) {
497 inputDepth = this->GetInputDepth();
498 inputHeight = this->GetInputHeight();
499 inputWidth = this->GetInputWidth();
501 Layer_t *lastLayer = fLayers.back();
502 inputDepth = lastLayer->GetDepth();
503 inputHeight = lastLayer->GetHeight();
504 inputWidth = lastLayer->GetWidth();
508 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
509 strideRows, strideCols, dropoutProbability);
512 fLayers.push_back(maxPoolLayer);
518template <
typename Architecture_t,
typename Layer_t>
521 fLayers.push_back(maxPoolLayer);
525template <
typename Architecture_t,
typename Layer_t>
528 bool rememberState,
bool returnSequence,
535 size_t inputHeight, inputWidth, inputDepth;
536 if (fLayers.size() == 0) {
537 inputHeight = this->GetInputHeight();
538 inputWidth = this->GetInputWidth();
539 inputDepth = this->GetInputDepth();
541 Layer_t *lastLayer = fLayers.back();
542 inputHeight = lastLayer->GetHeight();
543 inputWidth = lastLayer->GetWidth();
544 inputDepth = lastLayer->GetDepth();
546 if (inputSize != inputWidth) {
547 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
549 if (timeSteps != inputHeight && timeSteps != inputDepth) {
550 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
555 f, fIsTraining, this->GetInitialization());
556 fLayers.push_back(basicRNNLayer);
557 return basicRNNLayer;
561template <
typename Architecture_t,
typename Layer_t>
564 fLayers.push_back(basicRNNLayer);
568template <
typename Architecture_t,
typename Layer_t>
570 size_t timeSteps,
bool rememberState,
bool returnSequence)
573 size_t inputHeight, inputWidth, inputDepth;
574 if (fLayers.size() == 0) {
575 inputHeight = this->GetInputHeight();
576 inputWidth = this->GetInputWidth();
577 inputDepth = this->GetInputDepth();
579 Layer_t *lastLayer = fLayers.back();
580 inputHeight = lastLayer->GetHeight();
581 inputWidth = lastLayer->GetWidth();
582 inputDepth = lastLayer->GetDepth();
584 if (inputSize != inputWidth) {
585 Error(
"AddBasicLSTMLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
587 if (timeSteps != inputHeight && timeSteps != inputDepth) {
588 Error(
"AddBasicLSTMLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
595 fIsTraining, this->GetInitialization());
596 fLayers.push_back(basicLSTMLayer);
597 return basicLSTMLayer;
601template <
typename Architecture_t,
typename Layer_t>
604 fLayers.push_back(basicLSTMLayer);
609template <
typename Architecture_t,
typename Layer_t>
611 size_t timeSteps,
bool rememberState,
bool returnSequence,
bool resetGateAfter)
614 size_t inputHeight, inputWidth, inputDepth;
615 if (fLayers.size() == 0) {
616 inputHeight = this->GetInputHeight();
617 inputWidth = this->GetInputWidth();
618 inputDepth = this->GetInputDepth();
620 Layer_t *lastLayer = fLayers.back();
621 inputHeight = lastLayer->GetHeight();
622 inputWidth = lastLayer->GetWidth();
623 inputDepth = lastLayer->GetDepth();
625 if (inputSize != inputWidth) {
626 Error(
"AddBasicGRULayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
628 if (timeSteps != inputHeight && timeSteps != inputDepth) {
629 Error(
"AddBasicGRULayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
636 fIsTraining, this->GetInitialization());
637 fLayers.push_back(basicGRULayer);
638 return basicGRULayer;
642template <
typename Architecture_t,
typename Layer_t>
645 fLayers.push_back(basicGRULayer);
654template <
typename Architecture_t,
typename Layer_t>
657 Scalar_t dropoutProbability,
658 Scalar_t corruptionLevel)
660 size_t batchSize = this->GetBatchSize();
662 TCorruptionLayer<Architecture_t> *corruptionLayer =
663 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
664 fLayers.push_back(corruptionLayer);
665 return corruptionLayer;
669template <
typename Architecture_t,
typename Layer_t>
672 fLayers.push_back(corruptionLayer);
676template <
typename Architecture_t,
typename Layer_t>
677TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
679 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
681 size_t batchSize = this->GetBatchSize();
683 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
684 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
685 fLayers.push_back(compressionLayer);
686 return compressionLayer;
690template <
typename Architecture_t,
typename Layer_t>
691void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
693 fLayers.push_back(compressionLayer);
697template <
typename Architecture_t,
typename Layer_t>
698TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
699 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
700 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
702 size_t batchSize = this->GetBatchSize();
704 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
705 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
706 fLayers.push_back(reconstructionLayer);
707 return reconstructionLayer;
711template <
typename Architecture_t,
typename Layer_t>
712void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
713 TReconstructionLayer<Architecture_t> *reconstructionLayer)
715 fLayers.push_back(reconstructionLayer);
719template <
typename Architecture_t,
typename Layer_t>
720TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
721 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
723 size_t batchSize = this->GetBatchSize();
725 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
726 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
727 fLayers.push_back(logisticRegressionLayer);
728 return logisticRegressionLayer;
731template <
typename Architecture_t,
typename Layer_t>
732void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
733 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
735 fLayers.push_back(logisticRegressionLayer);
741template <
typename Architecture_t,
typename Layer_t>
745 size_t batchSize = this->GetBatchSize();
749 Scalar_t decay = this->GetWeightDecay();
751 if (fLayers.size() == 0) {
752 inputWidth = this->GetInputWidth();
754 Layer_t *lastLayer = fLayers.back();
755 inputWidth = lastLayer->GetWidth();
761 fLayers.push_back(denseLayer);
767template <
typename Architecture_t,
typename Layer_t>
770 fLayers.push_back(denseLayer);
774template <
typename Architecture_t,
typename Layer_t>
776 size_t width,
bool flattening)
778 size_t batchSize = this->GetBatchSize();
782 size_t outputNSlices;
786 if (fLayers.size() == 0) {
787 inputDepth = this->GetInputDepth();
788 inputHeight = this->GetInputHeight();
789 inputWidth = this->GetInputWidth();
791 Layer_t *lastLayer = fLayers.back();
792 inputDepth = lastLayer->GetDepth();
793 inputHeight = lastLayer->GetHeight();
794 inputWidth = lastLayer->GetWidth();
799 outputNRows = this->GetBatchSize();
800 outputNCols = depth * height *
width;
801 size_t inputNCols = inputDepth * inputHeight * inputWidth;
802 if (outputNCols != 0 && outputNCols != inputNCols ) {
803 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
804 inputDepth, inputHeight, inputWidth, depth, height,
width,inputNCols);
806 outputNCols = inputNCols;
811 outputNSlices = this->GetBatchSize();
813 outputNCols = height *
width;
818 outputNSlices, outputNRows, outputNCols, flattening);
820 fLayers.push_back(reshapeLayer);
826template <
typename Architecture_t,
typename Layer_t>
830 size_t batchSize = this->GetBatchSize();
831 size_t inputDepth = 0;
832 size_t inputHeight = 0;
833 size_t inputWidth = 0;
836 std::vector<size_t> shape = {1, 1, 1};
837 if (fLayers.size() == 0) {
838 inputDepth = this->GetInputDepth();
839 inputHeight = this->GetInputHeight();
840 inputWidth = this->GetInputWidth();
842 shape[0] = batchSize;
843 shape[1] = inputWidth;
846 Layer_t *lastLayer = fLayers.back();
847 inputDepth = lastLayer->GetDepth();
848 inputHeight = lastLayer->GetHeight();
849 inputWidth = lastLayer->GetWidth();
850 shape = lastLayer->GetOutput().GetShape();
854 if (shape.size() > 3) {
855 for (
size_t i = 3; i < shape.size(); ++i)
856 shape[2] *= shape[i];
865 fLayers.push_back(bnormLayer);
871template <
typename Architecture_t,
typename Layer_t>
874 fLayers.push_back(reshapeLayer);
878template <
typename Architecture_t,
typename Layer_t>
881 for (
size_t i = 0; i < fLayers.size(); i++) {
882 fLayers[i]->Initialize();
887template <
typename Architecture_t,
typename Layer_t>
890 for (
size_t i = 0; i < fLayers.size(); i++) {
891 fLayers[i]->ResetTraining();
897template <
typename Architecture_t,
typename Layer_t>
900 fLayers.front()->Forward(input, applyDropout);
902 for (
size_t i = 1; i < fLayers.size(); i++) {
903 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
912template <
typename Architecture_t,
typename Layer_t>
914 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
915 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
918 std::vector<Matrix_t> inp1;
919 std::vector<Matrix_t> inp2;
920 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
922 size_t visibleUnits = (size_t)input[0].GetNrows();
924 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
925 fLayers.back()->Initialize();
926 fLayers.back()->Forward(input, applyDropout);
929 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
930 fLayers.back()->GetBiases());
931 fLayers.back()->Initialize();
932 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
934 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
935 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
936 fLayers.back()->Initialize();
937 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
939 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
942 size_t weightsSize = fLayers.back()->GetWeights().size();
943 size_t biasesSize = fLayers.back()->GetBiases().size();
944 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
946 for (
size_t j = 0; j < weightsSize; j++) {
947 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
949 for (
size_t j = 0; j < biasesSize; j++) {
950 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
952 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
953 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
954 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
955 fLayers[fLayers.size() - 3]->GetOutput(), input);
957 fLayers.back()->Print();
959 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
961 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
962 fLayers.back()->Initialize();
963 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
966 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
967 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
968 fLayers.back()->Initialize();
969 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
971 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
972 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
974 fLayers.back()->Initialize();
975 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
977 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
978 fLayers[fLayers.size() - 5]->GetOutput());
981 size_t _weightsSize = fLayers.back()->GetWeights().size();
982 size_t _biasesSize = fLayers.back()->GetBiases().size();
983 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
985 for (
size_t j = 0; j < _weightsSize; j++) {
986 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
988 for (
size_t j = 0; j < _biasesSize; j++) {
989 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
991 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
992 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
993 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
994 fLayers[fLayers.size() - 3]->GetOutput(),
995 fLayers[fLayers.size() - 5]->GetOutput());
997 fLayers.back()->Print();
1002template <
typename Architecture_t,
typename Layer_t>
1003auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
1004 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
1005 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
1007 std::vector<Matrix_t> inp1;
1008 std::vector<Matrix_t> inp2;
1009 if (fLayers.size() == 0)
1011 size_t inputUnits = input[0].GetNrows();
1013 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1014 fLayers.back()->Initialize();
1015 for (
size_t i = 0; i < epochs; i++) {
1016 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1018 fLayers.back()->Forward(input,
false);
1019 fLayers.back()->Print();
1021 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1022 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1023 fLayers.back()->Initialize();
1024 for (
size_t i = 0; i < epochs; i++) {
1025 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1027 fLayers.back()->Forward(testInput,
false);
1028 fLayers.back()->Print();
1034template <
typename Architecture_t,
typename Layer_t>
1041 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
1042 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
1043 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
1044 last_output, weights);
1046 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
1047 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
1048 auto &activations_backward = fLayers[i - 1]->GetOutput();
1049 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1055 fLayers[0]->Backward(
dummy, input);
1058#ifdef USE_PARALLEL_DEEPNET
1061template <
typename Architecture_t,
typename Layer_t>
1064 bool applyDropout) ->
void
1066 size_t depth = this->GetDepth();
1069 for (
size_t i = 0; i < nets.size(); i++) {
1070 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1074 for (
size_t i = 1; i < depth; i++) {
1075 for (
size_t j = 0; j < nets.size(); j++) {
1076 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1082template <
typename Architecture_t,
typename Layer_t>
1083auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1084 std::vector<TTensorBatch<Architecture_t>> &batches,
1085 Scalar_t learningRate) ->
void
1087 std::vector<Matrix_t> inp1;
1088 std::vector<Matrix_t> inp2;
1089 size_t depth = this->GetDepth();
1092 for (
size_t i = 0; i < nets.size(); i++) {
1093 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1094 nets[i].GetLossFunction(), batches[i].GetOutput(),
1095 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1099 for (
size_t i = depth - 1; i > 0; i--) {
1100 for (
size_t j = 0; j < nets.size(); j++) {
1101 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1102 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1106 std::vector<Matrix_t>
dummy;
1109 for (
size_t i = 0; i < nets.size(); i++) {
1110 nets[i].GetLayerAt(0)->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1114 for (
size_t i = 0; i < nets.size(); i++) {
1115 for (
size_t j = 0; j < depth; j++) {
1116 Layer_t *masterLayer = this->GetLayerAt(j);
1117 Layer_t *layer = nets[i].GetLayerAt(j);
1119 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1120 layer->CopyWeights(masterLayer->GetWeights());
1122 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1123 layer->CopyBiases(masterLayer->GetBiases());
1129template <
typename Architecture_t,
typename Layer_t>
1130auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1131 std::vector<TTensorBatch<Architecture_t>> &batches,
1132 Scalar_t learningRate, Scalar_t momentum) ->
void
1134 std::vector<Matrix_t> inp1;
1135 std::vector<Matrix_t> inp2;
1136 size_t depth = this->GetDepth();
1139 for (
size_t i = 0; i < nets.size(); i++) {
1140 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1141 nets[i].GetLossFunction(), batches[i].GetOutput(),
1142 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1146 for (
size_t i = depth - 1; i > 0; i--) {
1147 Layer_t *masterLayer = this->GetLayerAt(i);
1149 for (
size_t j = 0; j < nets.size(); j++) {
1150 Layer_t *layer = nets[j].GetLayerAt(i);
1152 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1154 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1155 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1158 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1159 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1162 std::vector<Matrix_t>
dummy;
1165 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1166 for (
size_t i = 0; i < nets.size(); i++) {
1167 Layer_t *layer = nets[i].GetLayerAt(0);
1169 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1171 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1172 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1175 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1176 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1178 for (
size_t i = 0; i < depth; i++) {
1179 Layer_t *masterLayer = this->GetLayerAt(i);
1180 masterLayer->Update(1.0);
1182 for (
size_t j = 0; j < nets.size(); j++) {
1183 Layer_t *layer = nets[j].GetLayerAt(i);
1185 layer->CopyWeights(masterLayer->GetWeights());
1186 layer->CopyBiases(masterLayer->GetBiases());
1192template <
typename Architecture_t,
typename Layer_t>
1193auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1194 std::vector<TTensorBatch<Architecture_t>> &batches,
1195 Scalar_t learningRate, Scalar_t momentum) ->
void
1197 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1198 std::vector<Matrix_t> inp1;
1199 std::vector<Matrix_t> inp2;
1200 size_t depth = this->GetDepth();
1203 for (
size_t i = 0; i < nets.size(); i++) {
1204 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1205 nets[i].GetLossFunction(), batches[i].GetOutput(),
1206 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1210 for (
size_t i = depth - 1; i > 0; i--) {
1211 for (
size_t j = 0; j < nets.size(); j++) {
1212 Layer_t *layer = nets[j].GetLayerAt(i);
1214 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1219 std::vector<Matrix_t>
dummy;
1222 for (
size_t i = 0; i < nets.size(); i++) {
1223 Layer_t *layer = nets[i].GetLayerAt(0);
1224 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1227 for (
size_t i = 0; i < depth; i++) {
1228 Layer_t *masterLayer = this->GetLayerAt(i);
1229 for (
size_t j = 0; j < nets.size(); j++) {
1230 Layer_t *layer = nets[j].GetLayerAt(i);
1232 layer->CopyWeights(masterLayer->GetWeights());
1233 layer->CopyBiases(masterLayer->GetBiases());
1235 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1236 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1239 for (
size_t j = 0; j < nets.size(); j++) {
1240 Layer_t *layer = nets[j].GetLayerAt(i);
1242 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1243 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1246 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1247 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1249 masterLayer->Update(1.0);
1255template <
typename Architecture_t,
typename Layer_t>
1258 for (
size_t i = 0; i < fLayers.size(); i++) {
1259 fLayers[i]->Update(learningRate);
1264template <
typename Architecture_t,
typename Layer_t>
1266 bool includeRegularization)
const ->
Scalar_t
1269 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1272 if (includeRegularization) {
1273 loss += RegularizationTerm();
1280template <
typename Architecture_t,
typename Layer_t>
1282 const Matrix_t &weights,
bool inTraining,
bool includeRegularization)
1285 Forward(input, inTraining);
1286 return Loss(groundTruth, weights, includeRegularization);
1290template <
typename Architecture_t,
typename Layer_t>
1294 for (
size_t i = 0; i < fLayers.size(); i++) {
1295 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1296 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1299 return this->GetWeightDecay() * reg;
1304template <
typename Architecture_t,
typename Layer_t>
1308 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1312template <
typename Architecture_t,
typename Layer_t>
1316 Forward(input,
false);
1318 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1322template <
typename Architecture_t,
typename Layer_t>
1325 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1326 std::cout <<
" Input = ( " << this->GetInputDepth();
1327 std::cout <<
", " << this->GetInputHeight();
1328 std::cout <<
", " << this->GetInputWidth() <<
" )";
1329 std::cout <<
" Batch size = " << this->GetBatchSize();
1330 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1334 for (
size_t i = 0; i < fLayers.size(); i++) {
1335 std::cout <<
"\tLayer " << i <<
"\t";
1336 fLayers[i]->Print();
1341template <
typename Architecture_t,
typename Layer_t>
1343 const std::vector<Double_t> & probabilities)
1345 for (
size_t i = 0; i < fLayers.size(); i++) {
1346 if (i < probabilities.size()) {
1347 fLayers[i]->SetDropoutProbability(probabilities[i]);
1349 fLayers[i]->SetDropoutProbability(1.0);
#define R(a, b, c, d, e, f, g, h, i)
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Print() const
Print the Deep Net Info.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
size_t GetInputWidth() const
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
typename Architecture_t::Tensor_t Tensor_t
ELossFunction fJ
The loss function of the network.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void SetBatchWidth(size_t batchWidth)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
create variable transformations