59 TString
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key)
62 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
63 if (it == keyValueMap.end()) {
71 T fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
T defaultValue);
75 int fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
int defaultValue)
86 double fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
double defaultValue)
97 TString
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
108 bool fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
bool defaultValue)
116 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
125 std::vector<double>
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
126 std::vector<double> defaultValue)
129 if (parseString ==
"") {
133 parseString.ToUpper();
134 std::vector<double> values;
136 const TString tokenDelim(
"+");
137 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
138 TIter nextToken(tokenStrings);
140 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
141 std::stringstream sstr;
144 sstr >> currentValue;
145 values.push_back(currentValue);
155 DeclareOptionRef(fInputLayoutString =
"0|0|0",
"InputLayout",
"The Layout of the input");
157 DeclareOptionRef(fBatchLayoutString =
"0|0|0",
"BatchLayout",
"The Layout of the batch");
159 DeclareOptionRef(fLayoutString =
"DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR",
"Layout",
"Layout of the network.");
161 DeclareOptionRef(fErrorStrategy =
"CROSSENTROPY",
"ErrorStrategy",
"Loss function: Mean squared error (regression)" 162 " or cross entropy (binary classification).");
163 AddPreDefVal(TString(
"CROSSENTROPY"));
164 AddPreDefVal(TString(
"SUMOFSQUARES"));
165 AddPreDefVal(TString(
"MUTUALEXCLUSIVE"));
167 DeclareOptionRef(fWeightInitializationString =
"XAVIER",
"WeightInitialization",
"Weight initialization strategy");
168 AddPreDefVal(TString(
"XAVIER"));
169 AddPreDefVal(TString(
"XAVIERUNIFORM"));
171 DeclareOptionRef(fRandomSeed = 0,
"RandomSeed",
"Random seed used for weight initialization and batch shuffling");
174 DeclareOptionRef(fArchitectureString =
"CPU",
"Architecture",
"Which architecture to perform the training on.");
175 AddPreDefVal(TString(
"STANDARD"));
176 AddPreDefVal(TString(
"CPU"));
177 AddPreDefVal(TString(
"GPU"));
178 AddPreDefVal(TString(
"OPENCL"));
180 DeclareOptionRef(fTrainingStrategyString =
"LearningRate=1e-1," 183 "ConvergenceSteps=50," 189 "DropRepetitions=5|LearningRate=1e-4," 192 "ConvergenceSteps=50," 198 "DropConfig=0.0+0.5+0.5," 200 "Multithreading=True",
201 "TrainingStrategy",
"Defines the training strategies.");
208 if (IgnoreEventsWithNegWeightsInTraining()) {
209 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
212 if (fArchitectureString ==
"STANDARD") {
213 Log() << kINFO <<
"The STANDARD architecture has been deprecated. " 214 "Please use Architecture=CPU or Architecture=CPU." 215 "See the TMVA Users' Guide for instructions if you " 216 "encounter problems." 219 if (fArchitectureString ==
"OPENCL") {
220 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. " 221 "Please use Architecture=CPU or Architecture=CPU for the " 222 "time being. See the TMVA Users' Guide for instructions " 223 "if you encounter problems." 230 if (fArchitectureString ==
"GPU") {
231 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU 232 Log() << kERROR <<
"CUDA backend not enabled. Please make sure " 233 "you have CUDA installed and it was successfully " 234 "detected by CMAKE by using -Dcuda=On " 236 #ifdef R__HAS_TMVACPU 237 fArchitectureString =
"CPU";
238 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
240 fArchitectureString =
"Standard";
241 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
244 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
248 else if (fArchitectureString ==
"CPU") {
249 #ifndef R__HAS_TMVACPU // TMVA has no CPU support 250 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure " 251 "you have a BLAS implementation and it was successfully " 252 "detected by CMake as well that the imt CMake flag is set." 254 #ifdef R__HAS_TMVAGPU 255 fArchitectureString =
"GPU";
256 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
258 fArchitectureString =
"STANDARD";
259 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
262 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
267 Log() << kINFO <<
"Will use the deprecated STANDARD architecture !" <<
Endl;
268 fArchitectureString =
"STANDARD";
278 if (fErrorStrategy ==
"SUMOFSQUARES") {
281 if (fErrorStrategy ==
"CROSSENTROPY") {
286 if (fErrorStrategy !=
"SUMOFSQUARES") {
287 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid " 288 <<
" neural net error function. Setting error function to " 289 <<
" SUMOFSQUARES now." <<
Endl;
295 if (fErrorStrategy ==
"SUMOFSQUARES") {
298 if (fErrorStrategy ==
"CROSSENTROPY") {
301 if (fErrorStrategy ==
"MUTUALEXCLUSIVE") {
309 if (fWeightInitializationString ==
"XAVIER") {
311 }
else if (fWeightInitializationString ==
"XAVIERUNIFORM") {
313 }
else if (fWeightInitializationString ==
"GAUSS") {
315 }
else if (fWeightInitializationString ==
"UNIFORM") {
317 }
else if (fWeightInitializationString ==
"ZERO") {
325 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString(
"|"), TString(
","));
326 for (
auto &block : strategyKeyValues) {
339 if (regularization ==
"L1") {
341 }
else if (regularization ==
"L2") {
345 TString strMultithreading =
fetchValueTmp(block,
"Multithreading", TString(
"True"));
347 if (strMultithreading.BeginsWith(
"T")) {
353 fTrainingSettings.push_back(settings);
369 const TString delim(
"|");
372 TString inputLayoutString = this->GetInputLayoutString();
379 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
380 TIter nextInputDim(inputDimStrings);
384 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
388 TString strDepth(inputDimString->
GetString());
389 depth = (size_t)strDepth.Atoi();
393 TString strHeight(inputDimString->
GetString());
394 height = (size_t)strHeight.Atoi();
398 TString strWidth(inputDimString->
GetString());
399 width = (size_t)strWidth.Atoi();
405 this->SetInputDepth(depth);
406 this->SetInputHeight(height);
407 this->SetInputWidth(width);
415 const TString delim(
"|");
418 TString batchLayoutString = this->GetBatchLayoutString();
420 size_t batchDepth = 0;
421 size_t batchHeight = 0;
422 size_t batchWidth = 0;
425 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
426 TIter nextBatchDim(batchDimStrings);
430 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
434 TString strDepth(batchDimString->
GetString());
435 batchDepth = (size_t)strDepth.Atoi();
439 TString strHeight(batchDimString->
GetString());
440 batchHeight = (size_t)strHeight.Atoi();
444 TString strWidth(batchDimString->
GetString());
445 batchWidth = (size_t)strWidth.Atoi();
451 this->SetBatchDepth(batchDepth);
452 this->SetBatchHeight(batchHeight);
453 this->SetBatchWidth(batchWidth);
458 template <
typename Architecture_t,
typename Layer_t>
463 const TString layerDelimiter(
",");
464 const TString subDelimiter(
"|");
466 TString layoutString = this->GetLayoutString();
471 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
472 TIter nextLayer(layerStrings);
476 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
479 TIter nextToken(subStrings);
483 TString strLayerType = token->
GetString();
486 if (strLayerType ==
"DENSE") {
487 ParseDenseLayer(deepNet, nets, layerString->
GetString(), subDelimiter);
488 }
else if (strLayerType ==
"CONV") {
489 ParseConvLayer(deepNet, nets, layerString->
GetString(), subDelimiter);
490 }
else if (strLayerType ==
"MAXPOOL") {
491 ParseMaxPoolLayer(deepNet, nets, layerString->
GetString(), subDelimiter);
492 }
else if (strLayerType ==
"RESHAPE") {
493 ParseReshapeLayer(deepNet, nets, layerString->
GetString(), subDelimiter);
494 }
else if (strLayerType ==
"RNN") {
495 ParseRnnLayer(deepNet, nets, layerString->
GetString(), subDelimiter);
496 }
else if (strLayerType ==
"LSTM") {
497 Log() << kFATAL <<
"LSTM Layer is not yet fully implemented" <<
Endl;
505 template <
typename Architecture_t,
typename Layer_t>
516 const size_t inputSize = GetNvar();
519 TObjArray *subStrings = layerString.Tokenize(delim);
520 TIter nextToken(subStrings);
527 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
530 if (idxToken == 1)
continue;
533 if (strActFnc ==
"RELU") {
535 }
else if (strActFnc ==
"TANH") {
537 }
else if (strActFnc ==
"SYMMRELU") {
539 }
else if (strActFnc ==
"SOFTSIGN") {
541 }
else if (strActFnc ==
"SIGMOID") {
543 }
else if (strActFnc ==
"LINEAR") {
545 }
else if (strActFnc ==
"GAUSS") {
547 }
else if (width == 0) {
551 TString strNumNodes = strActFnc;
554 strNumNodes.ReplaceAll(
"N", strN);
555 strNumNodes.ReplaceAll(
"n", strN);
557 width = fml.
Eval(inputSize);
567 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
582 template <
typename Architecture_t,
typename Layer_t>
592 int zeroPadHeight = 0;
593 int zeroPadWidth = 0;
597 TObjArray *subStrings = layerString.Tokenize(delim);
598 TIter nextToken(subStrings);
602 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
607 depth = strDepth.Atoi();
611 TString strFltHeight(token->
GetString());
612 fltHeight = strFltHeight.Atoi();
617 fltWidth = strFltWidth.Atoi();
621 TString strStrideRows(token->
GetString());
622 strideRows = strStrideRows.Atoi();
626 TString strStrideCols(token->
GetString());
627 strideCols = strStrideCols.Atoi();
631 TString strZeroPadHeight(token->
GetString());
632 zeroPadHeight = strZeroPadHeight.Atoi();
636 TString strZeroPadWidth(token->
GetString());
637 zeroPadWidth = strZeroPadWidth.Atoi();
642 if (strActFnc ==
"RELU") {
644 }
else if (strActFnc ==
"TANH") {
646 }
else if (strActFnc ==
"SYMMRELU") {
648 }
else if (strActFnc ==
"SOFTSIGN") {
650 }
else if (strActFnc ==
"SIGMOID") {
652 }
else if (strActFnc ==
"LINEAR") {
654 }
else if (strActFnc ==
"GAUSS") {
664 zeroPadHeight, zeroPadWidth, activationFunction);
668 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
669 zeroPadHeight, zeroPadWidth, activationFunction);
681 template <
typename Architecture_t,
typename Layer_t>
693 TObjArray *subStrings = layerString.Tokenize(delim);
694 TIter nextToken(subStrings);
698 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
702 TString strFrmHeight(token->
GetString());
703 frameHeight = strFrmHeight.Atoi();
708 frameWidth = strFrmWidth.Atoi();
712 TString strStrideRows(token->
GetString());
713 strideRows = strStrideRows.Atoi();
717 TString strStrideCols(token->
GetString());
718 strideCols = strStrideCols.Atoi();
726 deepNet.
AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
729 if (fBuildNet) fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
741 template <
typename Architecture_t,
typename Layer_t>
749 bool flattening =
false;
752 TObjArray *subStrings = layerString.Tokenize(delim);
753 TIter nextToken(subStrings);
757 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
758 if (token->
GetString() ==
"FLAT") idxToken=4;
762 depth = strDepth.Atoi();
767 height = strHeight.Atoi();
772 width = strWidth.Atoi();
777 if (flat ==
"FLAT") {
790 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
802 template <
typename Architecture_t,
typename Layer_t>
811 bool rememberState =
false;
814 TObjArray *subStrings = layerString.Tokenize(delim);
815 TIter nextToken(subStrings);
819 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
823 TString strstateSize(token->
GetString());
824 stateSize = strstateSize.Atoi();
828 TString strinputSize(token->
GetString());
829 inputSize = strinputSize.Atoi();
833 TString strtimeSteps(token->
GetString());
834 timeSteps = strtimeSteps.Atoi();
838 TString strrememberState(token->
GetString());
839 rememberState = (bool) strrememberState.Atoi();
847 timeSteps, rememberState);
851 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
863 template <
typename Architecture_t,
typename Layer_t>
869 TObjArray *subStrings = layerString.Tokenize(delim);
870 TIter nextToken(subStrings);
874 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
884 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
885 fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
886 fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
887 fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
915 parseString.ReplaceAll(
" ",
"");
917 const TString keyValueDelim(
"=");
919 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
920 TIter nextBlock(blockStrings);
923 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
924 blockKeyValues.push_back(std::map<TString, TString>());
925 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
928 TIter nextToken(subStrings);
931 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
933 int delimPos = strKeyValue.First(keyValueDelim.Data());
934 if (delimPos <= 0)
continue;
936 TString strKey = TString(strKeyValue(0, delimPos));
938 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
943 currentBlock.insert(std::make_pair(strKey, strValue));
946 return blockKeyValues;
964 template <
typename Architecture_t>
968 using Scalar_t =
typename Architecture_t::Scalar_t;
989 size_t trainingPhase = 1;
996 size_t batchSize = settings.batchSize;
1015 if (batchDepth != batchSize && batchDepth > 1) {
1016 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1019 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1020 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1026 bool badLayout =
false;
1028 if (batchDepth == batchSize)
1029 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1031 if (batchHeight == batchSize && batchDepth == 1)
1032 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1034 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1035 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1040 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1044 if (trainingPhase == 1) {
1045 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1046 batchHeight, batchWidth, J, I, R, weightDecay));
1053 std::vector<DeepNet_t> nets{};
1054 nets.reserve(nThreads);
1055 for (
size_t i = 0; i < nThreads; i++) {
1057 nets.push_back(deepNet);
1063 if (trainingPhase > 1) {
1065 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1066 const auto & nLayer =
fNet->GetLayerAt(i);
1067 const auto & dLayer = deepNet.GetLayerAt(i);
1071 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1072 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1078 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1079 if (
Log().GetMinType() <= kINFO)
1085 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1086 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1087 deepNet.GetOutputWidth(), nThreads);
1090 TensorDataLoader_t testingData(testTuple, nTestSamples, deepNet.GetBatchSize(),
1091 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1092 deepNet.GetOutputWidth(), nThreads);
1096 settings.testInterval);
1099 std::vector<TTensorBatch<Architecture_t>> batches{};
1101 bool converged =
false;
1103 size_t stepCount = 0;
1104 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1107 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1108 tstart = std::chrono::system_clock::now();
1112 std::string separator(62,
'-');
1114 Log() << std::setw(10) <<
"Epoch" 1115 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Test Err." 1116 << std::setw(12) <<
"t(s)/epoch" << std::setw(12) <<
"Eval t(s)" 1117 << std::setw(12) <<
"nEvents/s" 1118 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1124 size_t shuffleSeed = 0;
1130 Log() <<
"Initial Deep Net Weights " <<
Endl;
1131 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1132 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1133 weights_tensor[
l].
Print();
1134 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1135 bias_tensor[0].Print();
1140 while (!converged) {
1142 trainingData.Shuffle(rng);
1147 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1155 auto my_batch = trainingData.GetTensorBatch();
1162 if (settings.momentum > 0.0) {
1164 minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1167 minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1175 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1177 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1179 t1 = std::chrono::system_clock::now();
1183 for (
auto batch : testingData) {
1184 auto inputTensor = batch.GetInput();
1185 auto outputMatrix = batch.GetOutput();
1186 auto weights = batch.GetWeights();
1187 testError += deepNet.Loss(inputTensor, outputMatrix, weights);
1191 t2 = std::chrono::system_clock::now();
1192 testError /= (
Double_t)(nTestSamples / settings.batchSize);
1194 if (testError < minTestError ) {
1196 Log() << std::setw(10) << stepCount <<
" Minimun Test error found - save the configuration " <<
Endl;
1197 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1198 const auto & nLayer =
fNet->GetLayerAt(i);
1199 const auto & dLayer = deepNet.GetLayerAt(i);
1202 ArchitectureImpl_t::CopyDiffArch(nLayer->GetWeights(), dLayer->GetWeights() );
1203 ArchitectureImpl_t::CopyDiffArch(nLayer->GetBiases(), dLayer->GetBiases() );
1208 minTestError = testError;
1210 else if ( minTestError <= 0. )
1211 minTestError = testError;
1216 for (
auto batch : trainingData) {
1217 auto inputTensor = batch.GetInput();
1218 auto outputMatrix = batch.GetOutput();
1219 auto weights = batch.GetWeights();
1221 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights);
1223 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1226 tend = std::chrono::system_clock::now();
1229 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1230 std::chrono::duration<double> elapsed1 = t1-tstart;
1233 std::chrono::duration<double> elapsed_testing = tend-
t1;
1236 double seconds = elapsed_seconds.count();
1239 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1241 converged = minimizer.HasConverged(testError) || stepCount >= settings.maxEpochs;
1243 Log() << std::setw(10) << stepCount <<
" | " << std::setw(12) << trainingError << std::setw(12) << testError
1244 << std::setw(12) << seconds/settings.testInterval
1245 << std::setw(12) << elapsed_testing.count()
1246 << std::setw(12) << 1./eventTime
1247 << std::setw(12) << minimizer.GetConvergenceCount()
1253 tstart = std::chrono::system_clock::now();
1257 if (converged && debug) {
1258 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << stepCount <<
Endl;
1259 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1260 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1261 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1262 weights_tensor[
l].
Print();
1263 bias_tensor[0].Print();
1278 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1283 #ifdef R__HAS_TMVAGPU 1284 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1285 TrainDeepNet<DNN::TCuda<Double_t> >();
1287 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure " 1288 "you have CUDA installed and it was successfully " 1289 "detected by CMAKE." 1294 Log() << kFATAL <<
"OPENCL backend not yet supported." <<
Endl;
1297 #ifdef R__HAS_TMVACPU 1298 Log() << kINFO <<
"Start of deep neural network training on CPU." <<
Endl <<
Endl;
1299 TrainDeepNet<DNN::TCpu<Double_t> >();
1301 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 1302 "you have a BLAS implementation and it was successfully " 1303 "detected by CMake as well that the imt CMake flag is set." 1308 Log() << kINFO <<
"Start of deep neural network training on the STANDARD architecture" <<
Endl <<
Endl;
1309 TrainDeepNet<DNN::TReference<Double_t> >();
1313 " is not a supported archiectire for TMVA::MethodDL" 1333 using Matrix_t =
typename ArchitectureImpl_t::Matrix_t;
1336 int batchWidth =
fNet->GetBatchWidth();
1337 int batchDepth =
fNet->GetBatchDepth();
1338 int batchHeight =
fNet->GetBatchHeight();
1339 int nb =
fNet->GetBatchSize();
1340 int noutput =
fNet->GetOutputWidth();
1345 std::vector<Matrix_t> X{};
1346 Matrix_t YHat(nb, noutput);
1356 int n1 = batchHeight;
1357 int n2 = batchWidth;
1361 X.emplace_back(Matrix_t(n1, n2));
1364 if (n1*n2 != nVariables) {
1369 for (
int j = 0; j < n1; ++j) {
1370 for (
int k = 0; k < n2; k++) {
1371 X[0](j, k) = inputValues[j*n1+k];
1377 for (
int k = 0; k < n2; k++) {
1378 X[0](0, k) = inputValues[k];
1385 double mvaValue = YHat(0, 0);
1388 #ifdef DEBUG_MVAVALUE 1389 using Tensor_t = std::vector<Matrix_t>;
1390 TMatrixF xInput(n1,n2, inputValues.data() );
1393 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1394 auto & deepnet = *
fNet;
1395 std::cout <<
"Loop on layers " << std::endl;
1396 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1397 std::cout <<
"Layer " <<
l;
1398 const auto * layer = deepnet.GetLayerAt(l);
1399 const Tensor_t & layer_output = layer->GetOutput();
1401 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1402 for (
size_t i = 0; i < layer_output.size(); ++i) {
1403 #ifdef R__HAS_TMVAGPU 1407 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1411 const Tensor_t & layer_weights = layer->GetWeights();
1412 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1413 if (layer_weights.size() > 0) {
1415 #ifdef R__HAS_TMVAGPU 1419 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1437 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
1445 Int_t inputDepth =
fNet->GetInputDepth();
1446 Int_t inputHeight =
fNet->GetInputHeight();
1447 Int_t inputWidth =
fNet->GetInputWidth();
1451 Int_t batchDepth =
fNet->GetBatchDepth();
1452 Int_t batchHeight =
fNet->GetBatchHeight();
1453 Int_t batchWidth =
fNet->GetBatchWidth();
1455 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
1456 char initialization =
static_cast<char>(
fNet->GetInitialization());
1466 xmlEngine.NewAttr(nn, 0,
"NetDepth",
gTools().StringFromInt(depth));
1468 xmlEngine.NewAttr(nn, 0,
"InputDepth",
gTools().StringFromInt(inputDepth));
1469 xmlEngine.NewAttr(nn, 0,
"InputHeight",
gTools().StringFromInt(inputHeight));
1470 xmlEngine.NewAttr(nn, 0,
"InputWidth",
gTools().StringFromInt(inputWidth));
1472 xmlEngine.NewAttr(nn, 0,
"BatchSize",
gTools().StringFromInt(batchSize));
1473 xmlEngine.NewAttr(nn, 0,
"BatchDepth",
gTools().StringFromInt(batchDepth));
1474 xmlEngine.NewAttr(nn, 0,
"BatchHeight",
gTools().StringFromInt(batchHeight));
1475 xmlEngine.NewAttr(nn, 0,
"BatchWidth",
gTools().StringFromInt(batchWidth));
1477 xmlEngine.NewAttr(nn, 0,
"LossFunction", TString(lossFunction));
1478 xmlEngine.NewAttr(nn, 0,
"Initialization", TString(initialization));
1479 xmlEngine.NewAttr(nn, 0,
"Regularization", TString(regularization));
1480 xmlEngine.NewAttr(nn, 0,
"OutputFunction", TString(outputFunction));
1485 for (
Int_t i = 0; i < depth; i++)
1505 size_t inputDepth, inputHeight, inputWidth;
1510 size_t batchSize, batchDepth, batchHeight, batchWidth;
1518 char lossFunctionChar;
1520 char initializationChar;
1522 char regularizationChar;
1524 char outputFunctionChar;
1541 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1542 batchHeight, batchWidth,
1543 static_cast<ELossFunction>(lossFunctionChar),
1544 static_cast<EInitialization>(initializationChar),
1545 static_cast<ERegularization>(regularizationChar),
1555 for (
size_t i = 0; i < netDepth; i++) {
1560 if (layerName ==
"DenseLayer") {
1572 fNet->AddDenseLayer(width, func, 0.0);
1576 else if (layerName ==
"ConvLayer") {
1581 size_t fltHeight, fltWidth = 0;
1582 size_t strideRows, strideCols = 0;
1583 size_t padHeight, padWidth = 0;
1597 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
1598 padHeight, padWidth, actFunction);
1603 else if (layerName ==
"MaxPoolLayer") {
1606 size_t frameHeight, frameWidth = 0;
1607 size_t strideRows, strideCols = 0;
1613 fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
1615 else if (layerName ==
"ReshapeLayer") {
1618 size_t depth, height,
width = 0;
1625 fNet->AddReshapeLayer(depth, height, width, flattening);
1628 else if (layerName ==
"RNNLayer") {
1631 size_t stateSize,inputSize, timeSteps = 0;
1632 int rememberState= 0;
1638 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
1644 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
void SetBatchHeight(size_t batchHeight)
DNN::ELossFunction GetLossFunction() const
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
TString fLayoutString
The string defining the layout of the deep net.
void SetInputDepth(size_t inputDepth)
Setters.
void SetBatchWidth(size_t batchWidth)
Collectable string class.
DNN::EInitialization GetWeightInitialization() const
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width...
void Train()
Methods for training the deep learning network.
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
EMsgType GetMinType() const
Virtual base Class for all MVA method.
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchWidth() const
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth, filter height and width, striding in rows and columns, the zero paddings, as well as the activation function and the dropout probability.
void Init()
default initializations
void ParseBatchLayout()
Parse the input layout.
void AddWeightsXMLTo(void *parent) const
void GetHelpMessage() const
Ranking for variables in method (implementation)
void DeclareOptions()
The option handling methods.
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width...
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero) ...
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
#define R(a, b, c, d, e, f, g, h, i)
TString fArchitectureString
The string defining the architecure: CPU or GPU.
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options...
size_t fInputDepth
The depth of the input.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
const Event * GetEvent() const
TString fTrainingStrategyString
The string defining the training strategy.
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DataSetInfo & DataInfo() const
void SetBatchDepth(size_t batchDepth)
TString fBatchLayoutString
The string defining the layout of the batch.
Class that contains all the data information.
virtual void Print(Option_t *option="") const
Print TNamed name and title.
TString GetArchitectureString() const
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
void Initialize()
Initialize the weights and biases according to the given initialization method.
void SetInputWidth(size_t inputWidth)
All of the options that can be specified in the training string.
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
const TString & GetString() const
size_t fInputWidth
The width of the input.
DNN::ELossFunction fLossFunction
The loss function.
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
const Ranking * CreateRanking()
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
const std::vector< TTrainingSettings > & GetTrainingSettings() const
size_t GetBatchDepth() const
virtual ~MethodDL()
Virtual Destructor.
size_t GetInputDepth() const
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
size_t fBatchDepth
The depth of the batch used to train the deep net.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width...
UInt_t GetNVariables() const
accessor to the number of variables
TString fErrorStrategy
The string defining the error strategy for training.
DNN::ERegularization regularization
size_t fInputHeight
The height of the input.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
void ParseInputLayout()
Parse the input layout.
EOutputFunction
Enum that represents output functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
void SetInputHeight(size_t inputHeight)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void ReadWeightsFromStream(std::istream &)
void TrainDeepNet()
Implementation of architecture specific train method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > & GetValues()
IPythonInteractive * fInteractive
DNN::EOutputFunction GetOutputFunction() const
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
size_t fBatchHeight
The height of the batch used to train the deep net.
std::vector< Double_t > dropoutProbabilities
void ReadWeightsFromXML(void *wghtnode)
TString fInputLayoutString
The string defining the layout of the input.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
size_t GetInputHeight() const
size_t GetBatchHeight() const
size_t GetInputWidth() const
size_t fBatchWidth
The width of the batch used to train the deep net.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
TString fWeightInitializationString
The string defining the weight initialization method.
std::unique_ptr< DeepNetImpl_t > fNet
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Generic Deep Neural Network class.