29 #ifndef TMVA_DNN_DEEPNET 30 #define TMVA_DNN_DEEPNET 47 #include "TMVA/DNN/DAE/CompressionLayer.h" 48 #include "TMVA/DNN/DAE/CorruptionLayer.h" 49 #include "TMVA/DNN/DAE/ReconstructionLayer.h" 50 #include "TMVA/DNN/DAE/LogisticRegressionLayer.h" 73 template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
76 using Matrix_t =
typename Architecture_t::Matrix_t;
77 using Scalar_t =
typename Architecture_t::Scalar_t;
81 size_t calculateDimension(
int imgDim,
int fltDim,
int padding,
int stride);
107 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
123 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
136 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
145 bool rememberState =
false);
174 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
179 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
184 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
186 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
190 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
196 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
198 std::vector<Matrix_t> weights,
199 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
204 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
208 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
209 size_t testDataBatchSize,
214 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
218 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
220 bool applyDropout =
false);
225 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
233 void Forward(std::vector<Matrix_t> &input,
bool applyDropout =
false);
241 void Backward(std::vector<Matrix_t> &input,
const Matrix_t &groundTruth,
const Matrix_t &weights);
274 bool applyDropout =
false,
bool includeRegularization =
true);
287 inline const Layer_t *
GetLayerAt(
size_t i)
const {
return fLayers[i]; }
290 inline size_t GetDepth()
const {
return fLayers.size(); }
294 inline std::vector<Layer_t *> &
GetLayers() {
return fLayers; }
295 inline const std::vector<Layer_t *> &
GetLayers()
const {
return fLayers; }
298 inline void Clear() { fLayers.clear(); }
338 template <
typename Architecture_t,
typename Layer_t>
340 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
342 fIsTraining(true), fWeightDecay(0.0)
348 template <
typename Architecture_t,
typename Layer_t>
350 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
360 template <
typename Architecture_t,
typename Layer_t>
371 template <
typename Architecture_t,
typename Layer_t>
378 template <
typename Architecture_t,
typename Layer_t>
381 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
382 if (!
isInteger(dimension) || dimension <= 0) {
385 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
386 iLayer, imgDim, fltDim, padding, stride);
392 return (
size_t)dimension;
396 template <
typename Architecture_t,
typename Layer_t>
398 size_t filterWidth,
size_t strideRows,
399 size_t strideCols,
size_t paddingHeight,
411 size_t weightsNRows = depth;
413 size_t biasesNRows = depth;
414 size_t biasesNCols = 1;
416 size_t outputNRows = depth;
427 Layer_t *lastLayer =
fLayers.back();
428 inputDepth = lastLayer->GetDepth();
429 inputHeight = lastLayer->GetHeight();
430 inputWidth = lastLayer->GetWidth();
436 filterDepth = inputDepth;
438 weightsNCols = filterDepth * filterHeight * filterWidth;
439 outputNCols = height *
width;
443 batchSize, inputDepth, inputHeight, inputWidth, depth, height,
width, weightsNRows, weightsNCols, biasesNRows,
444 biasesNCols, outputNSlices, outputNRows, outputNCols,
init, filterDepth, filterHeight, filterWidth, strideRows,
445 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
452 template <
typename Architecture_t,
typename Layer_t>
459 template <
typename Architecture_t,
typename Layer_t>
461 size_t strideRows,
size_t strideCols,
479 Layer_t *lastLayer =
fLayers.back();
480 inputDepth = lastLayer->GetDepth();
481 inputHeight = lastLayer->GetHeight();
482 inputWidth = lastLayer->GetWidth();
488 outputNRows = inputDepth;
489 outputNCols = height *
width;
492 batchSize, inputDepth, inputHeight, inputWidth, height,
width, outputNSlices, outputNRows, outputNCols,
493 frameHeight, frameWidth, strideRows, strideCols, dropoutProbability);
496 fLayers.push_back(maxPoolLayer);
502 template <
typename Architecture_t,
typename Layer_t>
505 fLayers.push_back(maxPoolLayer);
509 template <
typename Architecture_t,
typename Layer_t>
518 size_t inputHeight, inputWidth;
523 Layer_t *lastLayer =
fLayers.back();
524 inputHeight = lastLayer->GetHeight();
525 inputWidth = lastLayer->GetWidth();
527 if (inputSize != inputWidth) {
528 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
530 if (timeSteps != inputHeight) {
531 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
537 fLayers.push_back(basicRNNLayer);
538 return basicRNNLayer;
542 template <
typename Architecture_t,
typename Layer_t>
545 fLayers.push_back(basicRNNLayer);
552 template <
typename Architecture_t,
typename Layer_t>
560 TCorruptionLayer<Architecture_t> *corruptionLayer =
561 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
562 fLayers.push_back(corruptionLayer);
563 return corruptionLayer;
567 template <
typename Architecture_t,
typename Layer_t>
570 fLayers.push_back(corruptionLayer);
574 template <
typename Architecture_t,
typename Layer_t>
577 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
581 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
582 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
583 fLayers.push_back(compressionLayer);
584 return compressionLayer;
588 template <
typename Architecture_t,
typename Layer_t>
591 fLayers.push_back(compressionLayer);
595 template <
typename Architecture_t,
typename Layer_t>
598 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
Scalar_t dropoutProbability)
602 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
603 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
604 fLayers.push_back(reconstructionLayer);
605 return reconstructionLayer;
609 template <
typename Architecture_t,
typename Layer_t>
611 TReconstructionLayer<Architecture_t> *reconstructionLayer)
613 fLayers.push_back(reconstructionLayer);
617 template <
typename Architecture_t,
typename Layer_t>
619 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate)
623 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
624 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
625 fLayers.push_back(logisticRegressionLayer);
626 return logisticRegressionLayer;
629 template <
typename Architecture_t,
typename Layer_t>
631 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
633 fLayers.push_back(logisticRegressionLayer);
639 template <
typename Architecture_t,
typename Layer_t>
652 Layer_t *lastLayer =
fLayers.back();
653 inputWidth = lastLayer->GetWidth();
665 template <
typename Architecture_t,
typename Layer_t>
672 template <
typename Architecture_t,
typename Layer_t>
674 size_t width,
bool flattening)
680 size_t outputNSlices;
689 Layer_t *lastLayer =
fLayers.back();
690 inputDepth = lastLayer->GetDepth();
691 inputHeight = lastLayer->GetHeight();
692 inputWidth = lastLayer->GetWidth();
698 outputNCols = depth * height *
width;
699 size_t inputNCols = inputDepth * inputHeight * inputWidth;
700 if (outputNCols != 0 && outputNCols != inputNCols ) {
701 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
702 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
704 outputNCols = inputNCols;
711 outputNCols = height *
width;
716 outputNSlices, outputNRows, outputNCols, flattening);
718 fLayers.push_back(reshapeLayer);
724 template <
typename Architecture_t,
typename Layer_t>
727 fLayers.push_back(reshapeLayer);
731 template <
typename Architecture_t,
typename Layer_t>
734 for (
size_t i = 0; i <
fLayers.size(); i++) {
739 template <
typename Architecture>
740 auto debugTensor(
const std::vector<typename Architecture::Matrix_t> &
A,
const std::string
name =
"tensor") ->
void 742 std::cout <<
name <<
"\n";
743 for (
size_t l = 0;
l <
A.size(); ++
l) {
744 for (
size_t i = 0; i <
A[
l].GetNrows(); ++i) {
745 for (
size_t j = 0; j <
A[
l].GetNcols(); ++j) {
746 std::cout <<
A[
l](i, j) <<
" ";
750 std::cout <<
"********\n";
755 template <
typename Architecture_t,
typename Layer_t>
758 fLayers.front()->Forward(input, applyDropout);
760 for (
size_t i = 1; i <
fLayers.size(); i++) {
766 template <
typename Architecture_t,
typename Layer_t>
769 bool applyDropout) ->
void 774 for (
size_t i = 0; i < nets.size(); i++) {
775 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
779 for (
size_t i = 1; i < depth; i++) {
780 for (
size_t j = 0; j < nets.size(); j++) {
781 nets[j].GetLayerAt(i)->Forward(nets[j].
GetLayerAt(i - 1)->GetOutput(), applyDropout);
788 template <
typename Architecture_t,
typename Layer_t>
790 std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
794 std::vector<Matrix_t> inp1;
795 std::vector<Matrix_t> inp2;
796 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
798 size_t visibleUnits = (size_t)input[0].GetNrows();
800 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
802 fLayers.back()->Forward(input, applyDropout);
805 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f,
fLayers.back()->GetWeights(),
810 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f,
fLayers.back()->GetWeights(),
811 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
818 size_t weightsSize =
fLayers.back()->GetWeights().size();
819 size_t biasesSize =
fLayers.back()->GetBiases().size();
820 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
822 for (
size_t j = 0; j < weightsSize; j++) {
825 for (
size_t j = 0; j < biasesSize; j++) {
835 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
837 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
842 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
847 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
848 fLayers.back()->GetWeights(),
fLayers.back()->GetBiases(), corruptionLevel,
857 size_t _weightsSize =
fLayers.back()->GetWeights().size();
858 size_t _biasesSize =
fLayers.back()->GetBiases().size();
859 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
861 for (
size_t j = 0; j < _weightsSize; j++) {
864 for (
size_t j = 0; j < _biasesSize; j++) {
878 template <
typename Architecture_t,
typename Layer_t>
880 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
881 size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs) ->
void 883 std::vector<Matrix_t> inp1;
884 std::vector<Matrix_t> inp2;
887 size_t inputUnits = input[0].GetNrows();
889 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
891 for (
size_t i = 0; i < epochs; i++) {
892 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
894 fLayers.back()->Forward(input,
false);
897 size_t inputUnits =
fLayers.back()->GetOutputAt(0).GetNrows();
898 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
900 for (
size_t i = 0; i < epochs; i++) {
903 fLayers.back()->Forward(testInput,
false);
910 template <
typename Architecture_t,
typename Layer_t>
914 std::vector<Matrix_t> inp1;
915 std::vector<Matrix_t> inp2;
917 evaluateGradients<Architecture_t>(
fLayers.back()->GetActivationGradientsAt(0), this->
GetLossFunction(), groundTruth,
918 fLayers.back()->GetOutputAt(0), weights);
919 for (
size_t i =
fLayers.size() - 1; i > 0; i--) {
920 std::vector<Matrix_t> &activation_gradient_backward =
fLayers[i - 1]->GetActivationGradients();
921 std::vector<Matrix_t> &activations_backward =
fLayers[i - 1]->GetOutput();
922 fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
927 std::vector<Matrix_t>
dummy;
928 fLayers[0]->Backward(dummy, input, inp1, inp2);
932 template <
typename Architecture_t,
typename Layer_t>
937 std::vector<Matrix_t> inp1;
938 std::vector<Matrix_t> inp2;
942 for (
size_t i = 0; i < nets.size(); i++) {
943 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
944 nets[i].GetLossFunction(), batches[i].GetOutput(),
945 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
949 for (
size_t i = depth - 1; i > 0; i--) {
950 for (
size_t j = 0; j < nets.size(); j++) {
951 nets[j].GetLayerAt(i)->Backward(nets[j].
GetLayerAt(i - 1)->GetActivationGradients(),
952 nets[j].
GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
956 std::vector<Matrix_t>
dummy;
959 for (
size_t i = 0; i < nets.size(); i++) {
960 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
964 for (
size_t i = 0; i < nets.size(); i++) {
965 for (
size_t j = 0; j < depth; j++) {
967 Layer_t *layer = nets[i].GetLayerAt(j);
969 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
970 layer->CopyWeights(masterLayer->GetWeights());
972 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
973 layer->CopyBiases(masterLayer->GetBiases());
979 template <
typename Architecture_t,
typename Layer_t>
984 std::vector<Matrix_t> inp1;
985 std::vector<Matrix_t> inp2;
989 for (
size_t i = 0; i < nets.size(); i++) {
990 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
991 nets[i].GetLossFunction(), batches[i].GetOutput(),
992 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
996 for (
size_t i = depth - 1; i > 0; i--) {
999 for (
size_t j = 0; j < nets.size(); j++) {
1000 Layer_t *layer = nets[j].GetLayerAt(i);
1002 layer->Backward(nets[j].
GetLayerAt(i - 1)->GetActivationGradients(), nets[j].
GetLayerAt(i - 1)->GetOutput(),
1004 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1005 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1008 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1009 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1012 std::vector<Matrix_t>
dummy;
1015 Layer_t *masterFirstLayer = this->
GetLayerAt(0);
1016 for (
size_t i = 0; i < nets.size(); i++) {
1017 Layer_t *layer = nets[i].GetLayerAt(0);
1019 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1021 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1022 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1025 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1026 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1028 for (
size_t i = 0; i < depth; i++) {
1030 masterLayer->Update(1.0);
1032 for (
size_t j = 0; j < nets.size(); j++) {
1033 Layer_t *layer = nets[j].GetLayerAt(i);
1035 layer->CopyWeights(masterLayer->GetWeights());
1036 layer->CopyBiases(masterLayer->GetBiases());
1042 template <
typename Architecture_t,
typename Layer_t>
1047 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1048 std::vector<Matrix_t> inp1;
1049 std::vector<Matrix_t> inp2;
1053 for (
size_t i = 0; i < nets.size(); i++) {
1054 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1055 nets[i].GetLossFunction(), batches[i].GetOutput(),
1056 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1060 for (
size_t i = depth - 1; i > 0; i--) {
1061 for (
size_t j = 0; j < nets.size(); j++) {
1062 Layer_t *layer = nets[j].GetLayerAt(i);
1064 layer->Backward(nets[j].
GetLayerAt(i - 1)->GetActivationGradients(), nets[j].
GetLayerAt(i - 1)->GetOutput(),
1069 std::vector<Matrix_t>
dummy;
1072 for (
size_t i = 0; i < nets.size(); i++) {
1073 Layer_t *layer = nets[i].GetLayerAt(0);
1074 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1077 for (
size_t i = 0; i < depth; i++) {
1079 for (
size_t j = 0; j < nets.size(); j++) {
1080 Layer_t *layer = nets[j].GetLayerAt(i);
1082 layer->CopyWeights(masterLayer->GetWeights());
1083 layer->CopyBiases(masterLayer->GetBiases());
1085 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1086 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1089 for (
size_t j = 0; j < nets.size(); j++) {
1090 Layer_t *layer = nets[j].GetLayerAt(i);
1092 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1093 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1096 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1097 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1099 masterLayer->Update(1.0);
1104 template <
typename Architecture_t,
typename Layer_t>
1107 for (
size_t i = 0; i <
fLayers.size(); i++) {
1108 fLayers[i]->Update(learningRate);
1113 template <
typename Architecture_t,
typename Layer_t>
1115 bool includeRegularization)
const ->
Scalar_t 1121 if (includeRegularization) {
1122 for (
size_t i = 0; i <
fLayers.size(); i++) {
1123 for (
size_t j = 0; j < (
fLayers[i]->GetWeights()).size(); j++) {
1134 template <
typename Architecture_t,
typename Layer_t>
1136 const Matrix_t &weights,
bool applyDropout,
bool includeRegularization)
1140 return Loss(groundTruth, weights, includeRegularization);
1144 template <
typename Architecture_t,
typename Layer_t>
1148 evaluate<Architecture_t>(predictions,
f,
fLayers.back()->GetOutputAt(0));
1152 template <
typename Architecture_t,
typename Layer_t>
1158 evaluate<Architecture_t>(predictions,
f,
fLayers.back()->GetOutputAt(0));
1162 template <
typename Architecture_t,
typename Layer_t>
1165 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->
GetDepth();
1170 std::cout <<
" Loss function = " <<
static_cast<char>(this->
GetLossFunction()) << std::endl;
1174 for (
size_t i = 0; i <
fLayers.size(); i++) {
1175 std::cout <<
"\tLayer " << i <<
"\t";
Scalar_t GetWeightDecay() const
void SetBatchWidth(size_t batchWidth)
void Fatal(const char *location, const char *msgfmt,...)
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width...
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
void SetRegularization(ERegularization R)
ERegularization fR
The regularization used for the network.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth, filter height and width, striding in rows and columns, the zero paddings, as well as the activation function and the dropout probability.
EInitialization GetInitialization() const
typename Architecture_t::Scalar_t Scalar_t
Generic Max Pooling Layer class.
bool fIsTraining
Is the network training?
void SetBatchSize(size_t batchSize)
Setters.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width...
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
std::vector< Layer_t * > & GetLayers()
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
#define R(a, b, c, d, e, f, g, h, i)
EInitialization fI
The initialization method of the network.
const Layer_t * GetLayerAt(size_t i) const
size_t fInputHeight
The height of the input.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
size_t fBatchSize
Batch size used for training and evaluation.
size_t GetInputHeight() const
void SetBatchDepth(size_t batchDepth)
bool isInteger(Scalar_t x) const
void Info(const char *location, const char *msgfmt,...)
size_t GetBatchDepth() const
size_t GetBatchWidth() const
size_t fBatchWidth
The width of the batch used for training/testing.
void SetWeightDecay(Scalar_t weightDecay)
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
size_t fInputDepth
The depth of the input.
size_t GetInputDepth() const
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Error(const char *location, const char *msgfmt,...)
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
ELossFunction GetLossFunction() const
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
void Initialize(Bool_t useTMVAStyle=kTRUE)
void SetLossFunction(ELossFunction J)
TDeepNet()
Default Constructor.
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t >> &nets, std::vector< TTensorBatch< Architecture_t >> &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
void Print() const
Print the Deep Net Info.
const std::vector< Layer_t * > & GetLayers() const
size_t GetBatchSize() const
Getters.
ERegularization GetRegularization() const
void Initialize()
DAE functions.
size_t GetInputWidth() const
void SetInputHeight(size_t inputHeight)
void SetInputWidth(size_t inputWidth)
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width...
size_t fBatchDepth
The depth of the batch used for training/testing.
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EOutputFunction
Enum that represents output functions.
static RooMathCoreReg dummy
ELossFunction
Enum that represents objective functions for the net, i.e.
size_t fBatchHeight
The height of the batch used for training/testing.
void SetInputDepth(size_t inputDepth)
typename Architecture_t::Matrix_t Matrix_t
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases...
size_t fInputWidth
The width of the input.
void Clear()
Remove all layers from the network.
Abstract ClassifierFactory template that handles arbitrary types.
void SetInitialization(EInitialization I)
size_t GetBatchHeight() const
void SetBatchHeight(size_t batchHeight)
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Scalar_t fWeightDecay
The weight decay factor.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction fJ
The loss function of the network.
size_t GetOutputWidth() const
Generic Deep Neural Network class.