78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
132 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
142 std::vector<double> defaultValue)
145 if (parseString ==
"") {
150 std::vector<double> values;
154 TIter nextToken(tokenStrings);
156 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
157 std::stringstream sstr;
160 sstr >> currentValue;
161 values.push_back(currentValue);
178 " or cross entropy (binary classification).");
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
207 "ConvergenceSteps=100,"
213 "Regularization=None,"
215 "TrainingStrategy",
"Defines the training strategies.");
223 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
227 Log() << kWARNING <<
"The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
232 Log() << kINFO <<
"We will use instead the CPU architecture" <<
Endl;
236 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
242 Log() << kINFO <<
"We will try using the GPU-CUDA architecture if available" <<
Endl;
251 Log() << kINFO <<
"Will now use the GPU architecture !" <<
Endl;
253 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
258 Log() << kINFO <<
"Will now use instead the CPU architecture !" <<
Endl;
264 Log() << kINFO <<
"Will now use the CPU architecture with BLAS and IMT support !" <<
Endl;
266 Log() << kINFO <<
"Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
270 Log() << kINFO <<
"Will use anyway the CPU architecture but with slower performance" <<
Endl;
290 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
291 <<
" neural net error function. Setting error function to "
292 <<
" SUMOFSQUARES now." <<
Endl;
331 for (
auto &block : strategyKeyValues) {
354 if (optimizer ==
"SGD") {
356 }
else if (optimizer ==
"ADAM") {
358 }
else if (optimizer ==
"ADAGRAD") {
360 }
else if (optimizer ==
"RMSPROP") {
362 }
else if (optimizer ==
"ADADELTA") {
371 std::vector<TString> optimParamLabels = {
"_beta1",
"_beta2",
"_eps",
"_rho"};
373 std::map<TString, double> defaultValues = {
374 {
"ADADELTA_eps", 1.E-8}, {
"ADADELTA_rho", 0.95},
375 {
"ADAGRAD_eps", 1.E-8},
376 {
"ADAM_beta1", 0.9}, {
"ADAM_beta2", 0.999}, {
"ADAM_eps", 1.E-7},
377 {
"RMSPROP_eps", 1.E-7}, {
"RMSPROP_rho", 0.9},
379 for (
auto &pN : optimParamLabels) {
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
449 TIter nextInputDim(inputDimStrings);
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.
Length()/2 + 2);
457 inputShape.push_back(0);
458 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
463 inputShape.push_back(subDim);
468 if (inputShape.size() == 2) {
470 inputShape.insert(inputShape.begin() + 1, {1,1});
472 else if (inputShape.size() == 3) {
474 inputShape.insert(inputShape.begin() + 2, 1);
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
496 TIter nextBatchDim(batchDimStrings);
500 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
505 batchDepth = (size_t)strDepth.
Atoi();
510 batchHeight = (size_t)strHeight.
Atoi();
515 batchWidth = (size_t)strWidth.
Atoi();
528template <
typename Architecture_t,
typename Layer_t>
533 const TString layerDelimiter(
",");
534 const TString subDelimiter(
"|");
542 TIter nextLayer(layerStrings);
546 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
550 TIter nextToken(subStrings);
557 if (strLayerType ==
"DENSE") {
559 }
else if (strLayerType ==
"CONV") {
561 }
else if (strLayerType ==
"MAXPOOL") {
563 }
else if (strLayerType ==
"RESHAPE") {
565 }
else if (strLayerType ==
"BNORM") {
567 }
else if (strLayerType ==
"RNN") {
569 }
else if (strLayerType ==
"LSTM") {
571 }
else if (strLayerType ==
"GRU") {
582template <
typename Architecture_t,
typename Layer_t>
593 const size_t inputSize =
GetNvar();
597 TIter nextToken(subStrings);
604 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
609 if (strActFnc ==
"DENSE")
continue;
611 if (strActFnc ==
"RELU") {
613 }
else if (strActFnc ==
"TANH") {
615 }
else if (strActFnc ==
"FTANH") {
617 }
else if (strActFnc ==
"SYMMRELU") {
619 }
else if (strActFnc ==
"SOFTSIGN") {
621 }
else if (strActFnc ==
"SIGMOID") {
623 }
else if (strActFnc ==
"LINEAR") {
625 }
else if (strActFnc ==
"GAUSS") {
627 }
else if (
width == 0) {
631 TString strNumNodes = strActFnc;
642 size_t outputSize = 1;
670template <
typename Architecture_t,
typename Layer_t>
680 int zeroPadHeight = 0;
681 int zeroPadWidth = 0;
686 TIter nextToken(subStrings);
690 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
695 depth = strDepth.
Atoi();
700 fltHeight = strFltHeight.
Atoi();
705 fltWidth = strFltWidth.
Atoi();
710 strideRows = strStrideRows.
Atoi();
715 strideCols = strStrideCols.
Atoi();
720 zeroPadHeight = strZeroPadHeight.
Atoi();
725 zeroPadWidth = strZeroPadWidth.
Atoi();
730 if (strActFnc ==
"RELU") {
732 }
else if (strActFnc ==
"TANH") {
734 }
else if (strActFnc ==
"SYMMRELU") {
736 }
else if (strActFnc ==
"SOFTSIGN") {
738 }
else if (strActFnc ==
"SIGMOID") {
740 }
else if (strActFnc ==
"LINEAR") {
742 }
else if (strActFnc ==
"GAUSS") {
752 zeroPadHeight, zeroPadWidth, activationFunction);
756 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
757 zeroPadHeight, zeroPadWidth, activationFunction);
769template <
typename Architecture_t,
typename Layer_t>
775 int filterHeight = 0;
782 TIter nextToken(subStrings);
786 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
791 filterHeight = strFrmHeight.
Atoi();
796 filterWidth = strFrmWidth.
Atoi();
801 strideRows = strStrideRows.
Atoi();
806 strideCols = strStrideCols.
Atoi();
814 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
817 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
830template <
typename Architecture_t,
typename Layer_t>
838 bool flattening =
false;
842 TIter nextToken(subStrings);
846 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
847 if (token->
GetString() ==
"FLAT") idxToken=4;
851 depth = strDepth.
Atoi();
856 height = strHeight.
Atoi();
866 if (flat ==
"FLAT") {
891template <
typename Architecture_t,
typename Layer_t>
898 double momentum = -1;
903 TIter nextToken(subStrings);
907 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
932template <
typename Architecture_t,
typename Layer_t>
941 bool rememberState =
false;
942 bool returnSequence =
false;
943 bool resetGateAfter =
false;
947 TIter nextToken(subStrings);
951 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
956 stateSize = strstateSize.
Atoi();
962 inputSize = strinputSize.
Atoi();
968 timeSteps = strtimeSteps.
Atoi();
974 rememberState = (
bool) strrememberState.
Atoi();
994 auto * recurrentLayer = deepNet.
AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 recurrentLayer->Initialize();
997 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1000 auto *recurrentLayer = deepNet.
AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1001 recurrentLayer->Initialize();
1004 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1007 if (Architecture_t::IsCudnn()) resetGateAfter =
true;
1008 auto *recurrentLayer = deepNet.
AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1009 recurrentLayer->Initialize();
1012 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1015 Log() << kFATAL <<
"Invalid Recurrent layer type " <<
Endl;
1022 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1023 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1024 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1025 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1026 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1035 :
MethodBase(
Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1036 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1037 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1038 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1039 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1057 parseString.ReplaceAll(
" ",
"");
1059 const TString keyValueDelim(
"=");
1061 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1062 TIter nextBlock(blockStrings);
1065 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
1066 blockKeyValues.push_back(std::map<TString, TString>());
1067 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
1070 TIter nextToken(subStrings);
1073 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
1075 int delimPos = strKeyValue.
First(keyValueDelim.
Data());
1076 if (delimPos <= 0)
continue;
1085 currentBlock.insert(std::make_pair(strKey, strValue));
1088 return blockKeyValues;
1112 Int_t nValidationSamples = 0;
1117 if (fNumValidationString.EndsWith(
"%")) {
1122 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1123 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1125 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1126 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1128 }
else if (fNumValidationString.IsFloat()) {
1129 Double_t valSizeAsDouble = fNumValidationString.Atof();
1131 if (valSizeAsDouble < 1.0) {
1133 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1136 nValidationSamples = valSizeAsDouble;
1139 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1145 if (nValidationSamples < 0) {
1146 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1149 if (nValidationSamples == 0) {
1150 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1153 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1154 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
1155 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1158 return nValidationSamples;
1165template <
typename Architecture_t>
1169 using Scalar_t =
typename Architecture_t::Scalar_t;
1187 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1188 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1190 size_t trainingPhase = 1;
1194 size_t nThreads = 1;
1198 size_t batchSize = settings.batchSize;
1219 if (batchDepth != batchSize && batchDepth > 1) {
1220 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1223 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1224 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1230 bool badLayout =
false;
1232 if (batchDepth == batchSize)
1233 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1235 if (batchHeight == batchSize && batchDepth == 1)
1236 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1238 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1239 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1244 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1245 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1246 << nTrainingSamples <<
") test: (" << nValidationSamples
1247 <<
"). One of these is smaller than the batch size of "
1248 << settings.batchSize <<
". Please increase the batch"
1249 <<
" size to be at least the same size as the smallest"
1250 <<
" of them." <<
Endl;
1253 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1257 if (trainingPhase == 1) {
1258 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1266 std::vector<DeepNet_t> nets{};
1267 nets.reserve(nThreads);
1268 for (
size_t i = 0; i < nThreads; i++) {
1270 nets.push_back(deepNet);
1280 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1281 for (
auto & p : dropoutVector) {
1284 deepNet.SetDropoutProbabilities(dropoutVector);
1286 if (trainingPhase > 1) {
1288 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1289 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1310 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1311 if (
Log().GetMinType() <= kINFO)
1314 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1318 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1319 {inputDepth, inputHeight, inputWidth},
1320 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1321 deepNet.GetOutputWidth(), nThreads);
1324 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1325 {inputDepth, inputHeight, inputWidth},
1326 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1327 deepNet.GetOutputWidth(), nThreads);
1336 Log() <<
"Compute initial loss on the validation data " <<
Endl;
1337 for (
auto batch : validationData) {
1338 auto inputTensor = batch.GetInput();
1339 auto outputMatrix = batch.GetOutput();
1340 auto weights = batch.GetWeights();
1344 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false, includeRegularization);
1347 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1348 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1349 minValError += regzTerm;
1353 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1358 case EOptimizer::kSGD:
1359 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1363 case EOptimizer::kAdam: {
1364 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1366 deepNet, settings.learningRate, settings.optimizerParams[
"ADAM_beta1"],
1367 settings.optimizerParams[
"ADAM_beta2"], settings.optimizerParams[
"ADAM_eps"]));
1371 case EOptimizer::kAdagrad:
1372 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1374 settings.optimizerParams[
"ADAGRAD_eps"]));
1377 case EOptimizer::kRMSProp:
1378 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1380 settings.optimizerParams[
"RMSPROP_rho"],
1381 settings.optimizerParams[
"RMSPROP_eps"]));
1384 case EOptimizer::kAdadelta:
1385 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1387 settings.optimizerParams[
"ADADELTA_rho"],
1388 settings.optimizerParams[
"ADADELTA_eps"]));
1394 std::vector<TTensorBatch<Architecture_t>> batches{};
1396 bool converged =
false;
1397 size_t convergenceCount = 0;
1398 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1401 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1402 tstart = std::chrono::system_clock::now();
1405 auto optimParametersString = [&]() {
1407 for (
auto & element : settings.optimizerParams) {
1409 key.
ReplaceAll(settings.optimizerName +
"_",
"");
1410 double value = element.second;
1411 if (!optimParameters.
IsNull())
1412 optimParameters +=
",";
1414 optimParameters +=
" (";
1417 if (!optimParameters.
IsNull())
1418 optimParameters +=
")";
1419 return optimParameters;
1423 <<
" Optimizer " << settings.optimizerName
1424 << optimParametersString()
1425 <<
" Learning rate = " << settings.learningRate <<
" regularization " << (char)settings.regularization
1426 <<
" minimum error = " << minValError <<
Endl;
1428 std::string separator(62,
'-');
1430 Log() << std::setw(10) <<
"Epoch"
1431 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err." << std::setw(12)
1432 <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss" << std::setw(12) <<
"nEvents/s" << std::setw(12)
1433 <<
"Conv. Steps" <<
Endl;
1439 size_t shuffleSeed = 0;
1445 Log() <<
"Initial Deep Net Weights " <<
Endl;
1446 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1447 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1448 weights_tensor[
l].
Print();
1449 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1450 bias_tensor[0].
Print();
1453 Log() <<
" Start epoch iteration ..." <<
Endl;
1454 bool debugFirstEpoch =
false;
1455 bool computeLossInTraining =
true;
1456 size_t nTrainEpochs = 0;
1457 while (!converged) {
1459 trainingData.Shuffle(rng);
1465 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1472 if (debugFirstEpoch) std::cout <<
"\n\n----- batch # " << i <<
"\n\n";
1474 auto my_batch = trainingData.GetTensorBatch();
1476 if (debugFirstEpoch)
1477 std::cout <<
"got batch data - doing forward \n";
1481 Architecture_t::PrintTensor(my_batch.GetInput(),
"input tensor",
true);
1482 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1483 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1484 Architecture_t::PrintTensor(tOut,
"label tensor",
true) ;
1485 Architecture_t::PrintTensor(tW,
"weight tensor",
true) ;
1488 deepNet.Forward(my_batch.GetInput(),
true);
1490 if (computeLossInTraining) {
1491 auto outputMatrix = my_batch.GetOutput();
1492 auto weights = my_batch.GetWeights();
1493 trainingError += deepNet.Loss(outputMatrix, weights,
false);
1496 if (debugFirstEpoch)
1497 std::cout <<
"- doing backward \n";
1500 size_t nlayers = deepNet.GetLayers().size();
1501 for (
size_t l = 0;
l < nlayers; ++
l) {
1502 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0)
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
1506 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetOutput(),
1513 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1515 if (debugFirstEpoch)
1516 std::cout <<
"- doing optimizer update \n";
1519 optimizer->IncrementGlobalStep();
1523 std::cout <<
"minmimizer step - momentum " << settings.momentum <<
" learning rate " << optimizer->GetLearningRate() << std::endl;
1524 for (
size_t l = 0;
l < nlayers; ++
l) {
1525 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0) {
1526 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
TString::Format(
"weights after step layer %d",
l).
Data());
1527 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightGradientsAt(0),
"weight gradients");
1534 if (debugFirstEpoch) std::cout <<
"\n End batch loop - compute validation loss \n";
1536 debugFirstEpoch =
false;
1537 if ((nTrainEpochs % settings.testInterval) == 0) {
1539 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1541 t1 = std::chrono::system_clock::now();
1547 bool inTraining =
false;
1548 for (
auto batch : validationData) {
1549 auto inputTensor = batch.GetInput();
1550 auto outputMatrix = batch.GetOutput();
1551 auto weights = batch.GetWeights();
1553 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1556 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1557 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1558 valError += regTerm;
1563 t2 = std::chrono::system_clock::now();
1566 if (valError < minValError) {
1567 convergenceCount = 0;
1569 convergenceCount += settings.testInterval;
1573 if (valError < minValError ) {
1575 Log() << std::setw(10) << nTrainEpochs
1576 <<
" Minimum Test error found - save the configuration " <<
Endl;
1577 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1578 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1587 minValError = valError;
1589 else if ( minValError <= 0. )
1590 minValError = valError;
1592 if (!computeLossInTraining) {
1593 trainingError = 0.0;
1595 for (
auto batch : trainingData) {
1596 auto inputTensor = batch.GetInput();
1597 auto outputMatrix = batch.GetOutput();
1598 auto weights = batch.GetWeights();
1599 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1603 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1604 trainingError += regTerm;
1610 tend = std::chrono::system_clock::now();
1613 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1614 std::chrono::duration<double> elapsed1 =
t1-tstart;
1617 std::chrono::duration<double> elapsed_testing = tend-
t1;
1619 double seconds = elapsed_seconds.count();
1622 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1625 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1628 Log() << std::setw(10) << nTrainEpochs <<
" | "
1629 << std::setw(12) << trainingError
1630 << std::setw(12) << valError
1631 << std::setw(12) << seconds / settings.testInterval
1632 << std::setw(12) << elapsed_testing.count()
1633 << std::setw(12) << 1. / eventTime
1634 << std::setw(12) << convergenceCount
1640 tstart = std::chrono::system_clock::now();
1644 if (converged && debug) {
1645 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << nTrainEpochs
1647 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1648 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1649 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1650 weights_tensor[
l].
Print();
1651 bias_tensor[0].
Print();
1664 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1670#ifdef R__HAS_TMVAGPU
1671 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1673 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1675 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1678 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1679 "you have CUDA installed and it was successfully "
1680 "detected by CMAKE."
1685#ifdef R__HAS_TMVACPU
1688 Log() << kINFO <<
"Start of deep neural network training on CPU using MT, nthreads = "
1691 Log() << kINFO <<
"Start of deep neural network training on single thread CPU (without ROOT-MT support) " <<
Endl
1694 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1699 " is not a supported architecture for TMVA::MethodDL"
1712 if (!
fNet ||
fNet->GetDepth() == 0) {
1713 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1732 if (
fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1735 if (
fXInput.GetShape().size() == 2) {
1739 Log() << kFATAL <<
"First tensor dimension should be equal to batch size, i.e. = 1"
1747 if ( nVariables != nc * nhw) {
1748 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1749 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nhw
1752 for (
size_t j = 0; j < nc; j++) {
1753 for (
size_t k = 0; k < nhw; k++) {
1760 assert(
fXInput.GetShape().size() >= 4);
1761 size_t nc =
fXInput.GetCSize();
1762 size_t nh =
fXInput.GetHSize();
1763 size_t nw =
fXInput.GetWSize();
1764 size_t n = nc * nh * nw;
1765 if ( nVariables !=
n) {
1766 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1767 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nh <<
" x " << nw
1770 for (
size_t j = 0; j <
n; j++) {
1782 double mvaValue = (*fYHat)(0, 0);
1785#ifdef DEBUG_MVAVALUE
1786 using Tensor_t = std::vector<MatrixImpl_t>;
1787 TMatrixF xInput(n1,n2, inputValues.data() );
1790 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1791 auto & deepnet = *
fNet;
1792 std::cout <<
"Loop on layers " << std::endl;
1793 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1794 std::cout <<
"Layer " <<
l;
1795 const auto * layer = deepnet.GetLayerAt(
l);
1796 const Tensor_t & layer_output = layer->GetOutput();
1798 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1799 for (
size_t i = 0; i < layer_output.size(); ++i) {
1800#ifdef R__HAS_TMVAGPU
1804 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1808 const Tensor_t & layer_weights = layer->GetWeights();
1809 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1810 if (layer_weights.size() > 0) {
1812#ifdef R__HAS_TMVAGPU
1816 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1828template <
typename Architecture_t>
1833 if (!
fNet ||
fNet->GetDepth() == 0) {
1834 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1852 using Matrix_t =
typename Architecture_t::Matrix_t;
1856 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1857 std::vector<DeepNet_t> nets{};
1862 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1863 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1870 size_t n1 = deepNet.GetBatchHeight();
1871 size_t n2 = deepNet.GetBatchWidth();
1872 size_t n0 = deepNet.GetBatchSize();
1875 n1 = deepNet.GetBatchSize();
1879 Long64_t nEvents = lastEvt - firstEvt;
1881 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1889 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1898 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1902 std::vector<double> mvaValues(nEvents);
1905 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1907 Long64_t ievt_end = ievt + batchSize;
1909 if (ievt_end <= lastEvt) {
1911 if (ievt == firstEvt) {
1915 if (n1 == batchSize && n0 == 1) {
1916 if (n2 != nVariables) {
1917 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1918 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1922 if (n1*n2 != nVariables || n0 != batchSize) {
1923 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1924 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1930 auto batch = testData.GetTensorBatch();
1931 auto inputTensor = batch.GetInput();
1933 auto xInput = batch.GetInput();
1936 for (
size_t i = 0; i < batchSize; ++i) {
1937 double value = yHat(i,0);
1938 mvaValues[ievt + i] = (
TMath::IsNaN(value)) ? -999. : value;
1943 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1952 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1961 size_t nVariables = GetEvent()->GetNVariables();
1964 const Event *ev = GetEvent();
1965 const std::vector<Float_t>& inputValues = ev->
GetValues();
1966 for (
size_t i = 0; i < nVariables; i++) {
1967 X_vec(0,i,0) = inputValues[i];
1971 size_t nTargets = std::max(1u, ev->
GetNTargets());
1973 std::vector<Float_t>
output(nTargets);
1974 fNet->Prediction(YHat, X_vec, fOutputFunction);
1976 for (
size_t i = 0; i < nTargets; i++)
1979 if (fRegressionReturnVal == NULL) {
1980 fRegressionReturnVal =
new std::vector<Float_t>();
1982 fRegressionReturnVal->clear();
1985 for (
size_t i = 0; i < nTargets; ++i) {
1989 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1990 for (
size_t i = 0; i < nTargets; ++i) {
1991 fRegressionReturnVal->push_back(evT2->
GetTarget(i));
1994 return *fRegressionReturnVal;
2003 if (fMulticlassReturnVal == NULL) {
2004 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
2007 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
2008 for (
size_t i = 0; i < nVariables; i++) {
2009 X_vec(0,i, 0) = inputValues[i];
2012 fNet->Prediction(YHat, X_vec, fOutputFunction);
2013 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2014 (*fMulticlassReturnVal)[i] = YHat(0, i);
2016 return *fMulticlassReturnVal;
2035 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2036 if (firstEvt < 0) firstEvt = 0;
2037 nEvents = lastEvt-firstEvt;
2040 size_t defaultEvalBatchSize = (
fXInput.GetSize() > 1000) ? 100 : 1000;
2042 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
2046#ifdef R__HAS_TMVAGPU
2047 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
2049 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2051 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2056 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
2057 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2064 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
2072 Int_t inputDepth =
fNet->GetInputDepth();
2073 Int_t inputHeight =
fNet->GetInputHeight();
2074 Int_t inputWidth =
fNet->GetInputWidth();
2078 Int_t batchDepth =
fNet->GetBatchDepth();
2079 Int_t batchHeight =
fNet->GetBatchHeight();
2080 Int_t batchWidth =
fNet->GetBatchWidth();
2082 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
2083 char initialization =
static_cast<char>(
fNet->GetInitialization());
2104 xmlEngine.NewAttr(nn, 0,
"LossFunction",
TString(lossFunction));
2105 xmlEngine.NewAttr(nn, 0,
"Initialization",
TString(initialization));
2107 xmlEngine.NewAttr(nn, 0,
"OutputFunction",
TString(outputFunction));
2112 for (
Int_t i = 0; i < depth; i++)
2132 size_t inputDepth, inputHeight, inputWidth;
2137 size_t batchSize, batchDepth, batchHeight, batchWidth;
2145 char lossFunctionChar;
2147 char initializationChar;
2149 char regularizationChar;
2151 char outputFunctionChar;
2168 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2169 batchHeight, batchWidth,
2182 for (
size_t i = 0; i < netDepth; i++) {
2187 if (layerName ==
"DenseLayer") {
2203 else if (layerName ==
"ConvLayer") {
2208 size_t fltHeight, fltWidth = 0;
2209 size_t strideRows, strideCols = 0;
2210 size_t padHeight, padWidth = 0;
2224 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2225 padHeight, padWidth, actFunction);
2230 else if (layerName ==
"MaxPoolLayer") {
2233 size_t filterHeight, filterWidth = 0;
2234 size_t strideRows, strideCols = 0;
2240 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2243 else if (layerName ==
"ReshapeLayer") {
2246 size_t depth, height,
width = 0;
2253 fNet->AddReshapeLayer(depth, height,
width, flattening);
2257 else if (layerName ==
"RNNLayer") {
2260 size_t stateSize,inputSize, timeSteps = 0;
2261 int rememberState= 0;
2262 int returnSequence = 0;
2269 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2273 else if (layerName ==
"LSTMLayer") {
2276 size_t stateSize,inputSize, timeSteps = 0;
2277 int rememberState, returnSequence = 0;
2284 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2288 else if (layerName ==
"GRULayer") {
2291 size_t stateSize,inputSize, timeSteps = 0;
2292 int rememberState, returnSequence, resetGateAfter = 0;
2302 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2304 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2307 else if (layerName ==
"BatchNormLayer") {
2309 fNet->AddBatchNormLayer(0., 0.0);
2312 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
#define REGISTER_METHOD(CLASS)
for example
include TDocParser_001 C image html pict1_TDocParser_001 png width
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic Deep Neural Network class.
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
UInt_t GetNTargets() const
accessor to the number of targets
std::vector< Float_t > & GetValues()
Float_t GetTarget(UInt_t itgt) const
Virtual base Class for all MVA method.
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
UInt_t GetNVariables() const
Types::EAnalysisType fAnalysisType
TrainingHistory fTrainHistory
IPythonInteractive * fInteractive
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
size_t fBatchHeight
The height of the batch used to train the deep net.
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
virtual const std::vector< Float_t > & GetMulticlassValues()
std::vector< size_t > fInputShape
Contains the batch size (no.
TString fLayoutString
The string defining the layout of the deep net.
void SetInputDepth(int inputDepth)
Setters.
std::unique_ptr< MatrixImpl_t > fYHat
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
TString fArchitectureString
The string defining the architecure: CPU or GPU.
void Init()
default initializations
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
void SetInputWidth(int inputWidth)
HostBufferImpl_t fXInputBuffer
size_t fBatchWidth
The width of the batch used to train the deep net.
size_t GetInputDepth() const
virtual const std::vector< Float_t > & GetRegressionValues()
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchDepth() const
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
void SetInputShape(std::vector< size_t > inputShape)
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
TString fTrainingStrategyString
The string defining the training strategy.
const Ranking * CreateRanking()
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
void SetBatchDepth(size_t batchDepth)
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
void SetBatchSize(size_t batchSize)
TString GetLayoutString() const
size_t fBatchDepth
The depth of the batch used to train the deep net.
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
size_t GetBatchWidth() const
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
void SetInputHeight(int inputHeight)
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
void DeclareOptions()
The option handling methods.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation)
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
void Print(Option_t *name="") const
Print the matrix as a table of elements.
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Int_t Atoi() const
Return integer value of string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Ssiz_t First(char c) const
Find first occurrence of a character c.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void ToUpper()
Change string to upper case.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
create variable transformations
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
All of the options that can be specified in the training string.
std::map< TString, double > optimizerParams
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities
static void output(int code)