75 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
76 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
77 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
86 :
MethodBase( Types::kDNN, theData, theWeightFile),
142 "Layout of the network.");
145 "Part of the training data to use for " 146 "validation. Specify as 0.2 or 20% to use a " 147 "fifth of the data set as validation set. " 148 "Specify as 100 to use exactly 100 events. " 153 "Loss function: Mean squared error (regression)" 154 " or cross entropy (binary classification).");
160 "WeightInitialization",
161 "Weight initialization strategy");
175 "ConvergenceSteps=50," 181 "DropRepetitions=5|LearningRate=1e-4," 184 "ConvergenceSteps=50," 189 "DropConfig=0.0+0.5+0.5," 191 "Multithreading=True",
193 "Defines the training strategies.");
203 LayoutVector_t layout;
204 const TString layerDelimiter(
",");
205 const TString subDelimiter(
"|");
207 const size_t inputSize =
GetNvar();
209 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
210 TIter nextLayer (layerStrings);
213 for (; layerString !=
nullptr; layerString = (
TObjString*) nextLayer()) {
218 TIter nextToken (subStrings);
221 for (; token !=
nullptr; token = (
TObjString *) nextToken()) {
227 if (strActFnc ==
"RELU") {
229 }
else if (strActFnc ==
"TANH") {
231 }
else if (strActFnc ==
"SYMMRELU") {
233 }
else if (strActFnc ==
"SOFTSIGN") {
235 }
else if (strActFnc ==
"SIGMOID") {
237 }
else if (strActFnc ==
"LINEAR") {
239 }
else if (strActFnc ==
"GAUSS") {
248 strNumNodes.ReplaceAll (
"N", strN);
249 strNumNodes.ReplaceAll (
"n", strN);
251 numNodes = fml.Eval (inputSize);
257 layout.push_back(std::make_pair(numNodes, activationFunction));
271 const TString keyValueDelim (
"=");
273 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
274 TIter nextBlock (blockStrings);
277 for (; blockString !=
nullptr; blockString = (
TObjString *) nextBlock())
279 blockKeyValues.push_back (std::map<TString,TString>());
280 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
283 TIter nextToken (subStrings);
286 for (; token !=
nullptr; token = (
TObjString *)nextToken())
289 int delimPos = strKeyValue.
First (keyValueDelim.Data ());
295 TString strValue =
TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
300 currentBlock.insert (std::make_pair (strKey, strValue));
303 return blockKeyValues;
311 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
312 if (it == keyValueMap.end()) {
320 template <
typename T>
321 T fetchValue(
const std::map<TString,TString>& keyValueMap,
332 TString value (fetchValue (keyValueMap, key));
336 return value.
Atoi ();
342 double fetchValue (
const std::map<TString,TString>& keyValueMap,
343 TString key,
double defaultValue)
345 TString value (fetchValue (keyValueMap, key));
349 return value.
Atof ();
358 TString value (fetchValue (keyValueMap, key));
368 bool fetchValue (
const std::map<TString,TString>& keyValueMap,
369 TString key,
bool defaultValue)
371 TString value (fetchValue (keyValueMap, key));
376 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
385 std::vector<double>
fetchValue(
const std::map<TString, TString> & keyValueMap,
387 std::vector<double> defaultValue)
389 TString parseString (fetchValue (keyValueMap, key));
390 if (parseString ==
"") {
394 std::vector<double> values;
396 const TString tokenDelim (
"+");
398 TIter nextToken (tokenStrings);
400 for (; tokenString != NULL; tokenString = (
TObjString*)nextToken ()) {
401 std::stringstream sstr;
404 sstr >> currentValue;
405 values.push_back (currentValue);
416 <<
"Will ignore negative events in training!" 421 Log() << kERROR <<
"The STANDARD architecture has been deprecated. " 422 "Please use Architecture=CPU or Architecture=CPU." 423 "See the TMVA Users' Guide for instructions if you " 424 "encounter problems." 426 Log() << kFATAL <<
"The STANDARD architecture has been deprecated. " 427 "Please use Architecture=CPU or Architecture=CPU." 428 "See the TMVA Users' Guide for instructions if you " 429 "encounter problems." 434 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. " 435 "Please use Architecture=CPU or Architecture=CPU for the " 436 "time being. See the TMVA Users' Guide for instructions " 437 "if you encounter problems." 439 Log() << kFATAL <<
"The OPENCL architecture has not been implemented yet. " 440 "Please use Architecture=CPU or Architecture=CPU for the " 441 "time being. See the TMVA Users' Guide for instructions " 442 "if you encounter problems." 447 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set. 448 Log() << kERROR <<
"CUDA backend not enabled. Please make sure " 449 "you have CUDA installed and it was successfully " 452 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure " 453 "you have CUDA installed and it was successfully " 460 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set. 461 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure " 462 "you have a BLAS implementation and it was successfully " 463 "detected by CMake as well that the imt CMake flag is set." 465 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 466 "you have a BLAS implementation and it was successfully " 467 "detected by CMake as well that the imt CMake flag is set." 478 size_t outputSize = 1;
485 fNet.SetBatchSize(1);
486 fNet.SetInputWidth(inputSize);
488 auto itLayout = std::begin (fLayout);
489 auto itLayoutEnd = std::end (fLayout)-1;
490 for ( ; itLayout != itLayoutEnd; ++itLayout) {
491 fNet.AddLayer((*itLayout).first, (*itLayout).second);
493 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
503 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
506 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
511 Log () << kWARNING <<
"For regression only SUMOFSQUARES is a valid " 512 <<
" neural net error function. Setting error function to " 513 <<
" SUMOFSQUARES now." <<
Endl;
515 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
519 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
522 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
525 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
549 GetNumValidationSamples();
556 std::cout <<
"STring has size " << strategyKeyValues.size() << std::endl;
557 for (
auto& block : strategyKeyValues) {
567 std::vector<Double_t>());
571 if (regularization ==
"L1") {
573 }
else if (regularization ==
"L2") {
602 Int_t nValidationSamples = 0;
607 if (fValidationSize.EndsWith(
"%")) {
612 Double_t valSizeAsDouble = fValidationSize.
Atof() / 100.0;
615 Log() << kFATAL <<
"Cannot parse number \"" << fValidationSize
616 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
618 }
else if (fValidationSize.IsFloat()) {
619 Double_t valSizeAsDouble = fValidationSize.Atof();
621 if (valSizeAsDouble < 1.0) {
626 nValidationSamples = valSizeAsDouble;
629 Log() << kFATAL <<
"Cannot parse number \"" << fValidationSize <<
"\". Expected string like \"0.2\" or \"100\"." 635 if (nValidationSamples < 0) {
636 Log() << kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is negative." <<
Endl;
639 if (nValidationSamples == 0) {
640 Log() << kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is zero." <<
Endl;
643 if (nValidationSamples >= (
Int_t)trainingSetSize) {
644 Log() << kFATAL <<
"Validation size \"" << fValidationSize
645 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
648 return nValidationSamples;
656 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
668 Log() << kFATAL <<
"OpenCL backend not yet supported." <<
Endl;
677 Log() << kINFO <<
"Using Standard Implementation.";
679 std::vector<Pattern> trainPattern;
680 std::vector<Pattern> testPattern;
682 size_t nValidationSamples = GetNumValidationSamples();
686 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
687 const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
689 for (
auto &event : eventCollectionTraining) {
690 const std::vector<Float_t>& values =
event->GetValues();
692 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
693 trainPattern.push_back(
Pattern (values.begin(),
696 event->GetWeight()));
697 trainPattern.back().addInput(1.0);
699 std::vector<Float_t> oneHot(
DataInfo().GetNClasses(), 0.0);
700 oneHot[
event->GetClass()] = 1.0;
701 trainPattern.push_back(
Pattern (values.begin(), values.end(),
702 oneHot.cbegin(), oneHot.cend(),
703 event->GetWeight()));
704 trainPattern.back().addInput(1.0);
706 const std::vector<Float_t>& targets =
event->GetTargets ();
707 trainPattern.push_back(
Pattern(values.begin(),
711 event->GetWeight ()));
712 trainPattern.back ().addInput (1.0);
716 for (
auto &event : eventCollectionTesting) {
717 const std::vector<Float_t>& values =
event->GetValues();
719 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
720 testPattern.push_back(
Pattern (values.begin(),
723 event->GetWeight()));
724 testPattern.back().addInput(1.0);
726 std::vector<Float_t> oneHot(
DataInfo().GetNClasses(), 0.0);
727 oneHot[
event->GetClass()] = 1.0;
728 testPattern.push_back(
Pattern (values.begin(), values.end(),
729 oneHot.cbegin(), oneHot.cend(),
730 event->GetWeight()));
731 testPattern.back().addInput(1.0);
733 const std::vector<Float_t>& targets =
event->GetTargets ();
734 testPattern.push_back(
Pattern(values.begin(),
738 event->GetWeight ()));
739 testPattern.back ().addInput (1.0);
744 std::vector<double> weights;
751 for (
size_t i = 0; i <
fNet.GetDepth(); i++) {
755 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR;
break;
756 case EActivationFunction::kRelu: g = EnumFunction::RELU;
break;
757 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID;
break;
758 case EActivationFunction::kTanh: g = EnumFunction::TANH;
break;
759 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU;
break;
760 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN;
break;
761 case EActivationFunction::kGauss: g = EnumFunction::GAUSS;
break;
763 if (i <
fNet.GetDepth() - 1) {
768 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT;
break;
769 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID;
break;
770 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX;
break;
776 switch(
fNet.GetLossFunction()) {
777 case ELossFunction::kMeanSquaredError:
780 case ELossFunction::kCrossEntropy:
783 case ELossFunction::kSoftmaxCrossEntropy:
789 case EInitialization::kGauss:
791 std::back_inserter(weights));
793 case EInitialization::kUniform:
795 std::back_inserter(weights));
799 std::back_inserter(weights));
807 switch(
s.regularization) {
809 case ERegularization::kL1: r = EnumRegularization::L1;
break;
810 case ERegularization::kL2: r = EnumRegularization::L2;
break;
814 s.testInterval,
s.weightDecay, r,
816 s.momentum, 1,
s.multithreading);
817 std::shared_ptr<Settings> ptrSettings(settings);
818 ptrSettings->setMonitoring (0);
820 <<
"Training with learning rate = " << ptrSettings->learningRate ()
821 <<
", momentum = " << ptrSettings->momentum ()
822 <<
", repetitions = " << ptrSettings->repetitions ()
825 ptrSettings->setProgressLimits ((idxSetting)*100.0/(
fSettings.size ()),
826 (idxSetting+1)*100.0/(
fSettings.size ()));
828 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
829 if (!dropConfig.empty ()) {
830 Log () << kINFO <<
"Drop configuration" <<
Endl 831 <<
" drop repetitions = " << ptrSettings->dropRepetitions()
836 for (
auto f : dropConfig) {
837 Log () << kINFO <<
" Layer " << idx <<
" = " <<
f <<
Endl;
843 ptrSettings->momentum(),
844 ptrSettings->repetitions());
845 net.
train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
850 size_t weightIndex = 0;
851 for (
size_t l = 0;
l <
fNet.GetDepth();
l++) {
852 auto & layerWeights =
fNet.GetLayer(
l).GetWeights();
853 for (
Int_t j = 0; j < layerWeights.GetNcols(); j++) {
854 for (
Int_t i = 0; i < layerWeights.GetNrows(); i++) {
855 layerWeights(i,j) = weights[weightIndex];
859 auto & layerBiases =
fNet.GetLayer(
l).GetBiases();
861 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
862 layerBiases(i,0) = weights[weightIndex];
866 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
867 layerBiases(i,0) = 0.0;
880 #ifdef DNNCUDA // Included only if DNNCUDA flag is set. 881 Log() << kINFO <<
"Start of neural network training on GPU." <<
Endl <<
Endl;
883 size_t nValidationSamples = GetNumValidationSamples();
885 size_t nTestSamples = nValidationSamples;
887 Log() << kDEBUG <<
"Using " << nValidationSamples <<
" validation samples." <<
Endl;
888 Log() << kDEBUG <<
"Using " << nTestSamples <<
" training samples." <<
Endl;
890 size_t trainingPhase = 1;
904 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
905 for (
auto & p : dropoutVector) {
911 auto testNet = net.
CreateClone(settings.batchSize);
913 Log() << kINFO <<
"Training phase " << trainingPhase <<
" of " 914 << fTrainingSettings.size() <<
":" <<
Endl;
921 const std::vector<Event *> trainingInputData =
922 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
923 const std::vector<Event *> testInputData =
924 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
926 if (trainingInputData.size() != nTrainingSamples) {
927 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
929 if (testInputData.size() != nTestSamples) {
930 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
936 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
939 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
943 settings.convergenceSteps,
944 settings.testInterval);
946 std::vector<TNet<TCuda<>>> nets{};
947 std::vector<TBatch<TCuda<>>> batches{};
948 nets.reserve(nThreads);
949 for (
size_t i = 0; i < nThreads; i++) {
951 for (
size_t j = 0; j < net.
GetDepth(); j++)
953 auto &masterLayer = net.
GetLayer(j);
954 auto &layer = nets.back().GetLayer(j);
956 masterLayer.GetWeights());
958 masterLayer.GetBiases());
962 bool converged =
false;
963 size_t stepCount = 0;
964 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
966 std::chrono::time_point<std::chrono::system_clock> start, end;
967 start = std::chrono::system_clock::now();
970 Log() << std::setw(10) <<
"Epoch" <<
" | " 971 << std::setw(12) <<
"Train Err." 972 << std::setw(12) <<
"Test Err." 973 << std::setw(12) <<
"GFLOP/s" 974 << std::setw(12) <<
"Conv. Steps" <<
Endl;
975 std::string separator(62,
'-');
984 trainingData.Shuffle();
985 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
987 for (
size_t j = 0; j < nThreads; j++) {
988 batches.reserve(nThreads);
989 batches.push_back(trainingData.GetBatch());
991 if (settings.momentum > 0.0) {
992 minimizer.StepMomentum(net, nets, batches, settings.momentum);
994 minimizer.Step(net, nets, batches);
998 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1002 for (
auto batch : testData) {
1003 auto inputMatrix = batch.GetInput();
1004 auto outputMatrix = batch.GetOutput();
1005 testError += testNet.Loss(inputMatrix, outputMatrix);
1007 testError /= (
Double_t) (nTestSamples / settings.batchSize);
1009 end = std::chrono::system_clock::now();
1013 for (
auto batch : trainingData) {
1014 auto inputMatrix = batch.GetInput();
1015 auto outputMatrix = batch.GetOutput();
1016 trainingError += net.
Loss(inputMatrix, outputMatrix);
1018 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
1021 std::chrono::duration<double> elapsed_seconds = end - start;
1022 double seconds = elapsed_seconds.count();
1023 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1026 converged = minimizer.HasConverged(testError);
1027 start = std::chrono::system_clock::now();
1032 / minimizer.GetConvergenceSteps ();
1035 Log() << std::setw(10) << stepCount <<
" | " 1036 << std::setw(12) << trainingError
1037 << std::setw(12) << testError
1038 << std::setw(12) << nFlops / seconds
1039 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1052 #else // DNNCUDA flag not set. 1054 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure " 1055 "you have CUDA installed and it was successfully " 1056 "detected by CMAKE." <<
Endl;
1065 #ifdef DNNCPU // Included only if DNNCPU flag is set. 1066 Log() << kINFO <<
"Start of neural network training on CPU." <<
Endl <<
Endl;
1068 size_t nValidationSamples = GetNumValidationSamples();
1070 size_t nTestSamples = nValidationSamples;
1072 Log() << kDEBUG <<
"Using " << nValidationSamples <<
" validation samples." <<
Endl;
1073 Log() << kDEBUG <<
"Using " << nTestSamples <<
" training samples." <<
Endl;
1077 size_t trainingPhase = 1;
1084 Log() <<
"Training phase " << trainingPhase <<
" of " 1085 << fTrainingSettings.size() <<
":" <<
Endl;
1093 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1094 for (
auto & p : dropoutVector) {
1100 auto testNet = net.
CreateClone(settings.batchSize);
1106 const std::vector<Event *> trainingInputData =
1107 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1108 const std::vector<Event *> testInputData =
1109 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1111 if (trainingInputData.size() != nTrainingSamples) {
1112 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
1114 if (testInputData.size() != nTestSamples) {
1115 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
1118 size_t nThreads = 1;
1121 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1124 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1128 settings.convergenceSteps,
1129 settings.testInterval);
1131 std::vector<TNet<TCpu<>>> nets{};
1132 std::vector<TBatch<TCpu<>>> batches{};
1133 nets.reserve(nThreads);
1134 for (
size_t i = 0; i < nThreads; i++) {
1135 nets.push_back(net);
1136 for (
size_t j = 0; j < net.
GetDepth(); j++)
1138 auto &masterLayer = net.
GetLayer(j);
1139 auto &layer = nets.back().GetLayer(j);
1141 masterLayer.GetWeights());
1143 masterLayer.GetBiases());
1147 bool converged =
false;
1148 size_t stepCount = 0;
1149 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
1151 std::chrono::time_point<std::chrono::system_clock> start, end;
1152 start = std::chrono::system_clock::now();
1155 Log() << std::setw(10) <<
"Epoch" <<
" | " 1156 << std::setw(12) <<
"Train Err." 1157 << std::setw(12) <<
"Test Err." 1158 << std::setw(12) <<
"GFLOP/s" 1159 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1160 std::string separator(62,
'-');
1168 trainingData.Shuffle();
1169 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
1171 for (
size_t j = 0; j < nThreads; j++) {
1172 batches.reserve(nThreads);
1173 batches.push_back(trainingData.GetBatch());
1175 if (settings.momentum > 0.0) {
1176 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1178 minimizer.Step(net, nets, batches);
1182 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1186 for (
auto batch : testData) {
1187 auto inputMatrix = batch.GetInput();
1188 auto outputMatrix = batch.GetOutput();
1189 auto weightMatrix = batch.GetWeights();
1190 testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1192 testError /= (
Double_t) (nTestSamples / settings.batchSize);
1194 end = std::chrono::system_clock::now();
1198 for (
auto batch : trainingData) {
1199 auto inputMatrix = batch.GetInput();
1200 auto outputMatrix = batch.GetOutput();
1201 auto weightMatrix = batch.GetWeights();
1202 trainingError += net.
Loss(inputMatrix, outputMatrix, weightMatrix);
1204 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
1208 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1213 std::chrono::duration<double> elapsed_seconds = end - start;
1214 double seconds = elapsed_seconds.count();
1215 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1218 converged = minimizer.HasConverged(testError);
1219 start = std::chrono::system_clock::now();
1224 / minimizer.GetConvergenceSteps ();
1227 Log() << std::setw(10) << stepCount <<
" | " 1228 << std::setw(12) << trainingError
1229 << std::setw(12) << testError
1230 << std::setw(12) << nFlops / seconds
1231 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1241 auto & layer =
fNet.GetLayer(
l);
1247 #else // DNNCPU flag not set. 1248 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 1249 "you have a BLAS implementation and it was successfully " 1250 "detected by CMake as well that the imt CMake flag is set." <<
Endl;
1263 for (
size_t i = 0; i < nVariables; i++) {
1264 X(0,i) = inputValues[i];
1279 const std::vector<Float_t>& inputValues = ev->
GetValues();
1280 for (
size_t i = 0; i < nVariables; i++) {
1281 X(0,i) = inputValues[i];
1284 size_t nTargets = std::max(1u, ev->
GetNTargets());
1286 std::vector<Float_t>
output(nTargets);
1287 auto net =
fNet.CreateClone(1);
1290 for (
size_t i = 0; i < nTargets; i++)
1291 output[i] = YHat(0, i);
1299 for (
size_t i = 0; i < nTargets; ++i) {
1304 for (
size_t i = 0; i < nTargets; ++i) {
1321 for (
size_t i = 0; i < nVariables; i++) {
1322 X(0,i) = inputValues[i];
1326 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1327 (*fMulticlassReturnVal)[i] = YHat(0, i);
1337 Int_t inputWidth =
fNet.GetInputWidth();
1339 char lossFunction =
static_cast<char>(
fNet.GetLossFunction());
1341 gTools().StringFromInt(inputWidth));
1347 for (
Int_t i = 0; i < depth; i++) {
1348 const auto& layer =
fNet.GetLayer(i);
1350 int activationFunction =
static_cast<int>(layer.GetActivationFunction());
1353 WriteMatrixXML(layerxml,
"Weights", layer.GetWeights());
1354 WriteMatrixXML(layerxml,
"Biases", layer.GetBiases());
1368 fNet.SetBatchSize(1);
1370 size_t inputWidth, depth;
1373 char lossFunctionChar;
1375 char outputFunctionChar;
1378 fNet.SetInputWidth(inputWidth);
1379 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1382 size_t previousWidth = inputWidth;
1384 for (
size_t i = 0; i < depth; i++) {
1397 fNet.AddLayer(width, f);
1400 ReadMatrixXML(layerXML,
"Weights", weights);
1401 ReadMatrixXML(layerXML,
"Biases", biases);
1402 fNet.GetLayer(i).GetWeights() = weights;
1403 fNet.GetLayer(i).GetBiases() = biases;
1406 previousWidth =
width;
1446 Log() << col <<
"--- Short description:" << colres <<
Endl;
1448 Log() <<
"The DNN neural network is a feedforward" <<
Endl;
1449 Log() <<
"multilayer perceptron implementation. The DNN has a user-" <<
Endl;
1450 Log() <<
"defined hidden layer architecture, where the number of input (output)" <<
Endl;
1451 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1452 Log() <<
"signal and one background, regression or multiclass). " <<
Endl;
1454 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1457 const char* txt =
"The DNN supports various options to improve performance in terms of training speed and \n \ 1458 reduction of overfitting: \n \ 1460 - different training settings can be stacked. Such that the initial training \n\ 1461 is done with a large learning rate and a large drop out fraction whilst \n \ 1462 in a later stage learning rate and drop out can be reduced. \n \ 1465 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \ 1466 later training stage: 0.1 or 0.0 for all layers \n \ 1467 final training stage: 0.0] \n \ 1468 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \ 1469 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \ 1470 - L1 and L2 regularization are available \n \ 1472 [recommended 10 - 150] \n \ 1473 Arbitrary mini-batch sizes can be chosen. \n \ 1474 - Multithreading \n \ 1475 [recommended: True] \n \ 1476 Multithreading can be turned on. The minibatches are distributed to the available \n \ 1477 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \ 1481 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \ 1483 . two hidden layers (separated by \",\") \n \ 1484 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \ 1485 . the activation function for the output layer is LINEAR \n \ 1486 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \ 1487 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \ 1488 . the number of nodes in the output layer is determined by the number of output nodes \n \ 1489 and can therefore not be chosen freely. \n \ 1491 \"ErrorStrategy\": \n \ 1493 The error of the neural net is determined by a sum-of-squares error function \n \ 1494 For regression, this is the only possible choice. \n \ 1496 The error of the neural net is determined by a cross entropy function. The \n \ 1497 output values are automatically (internally) transformed into probabilities \n \ 1498 using a sigmoid function. \n \ 1499 For signal/background classification this is the default choice. \n \ 1500 For multiclass using cross entropy more than one or no output classes \n \ 1501 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \ 1502 A and C is true, Event 2: C is true, ...) \n \ 1503 - MUTUALEXCLUSIVE \n \ 1504 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \ 1506 \"WeightInitialization\" \n \ 1509 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \ 1510 such that the variance of the values of the nodes is preserved for each layer. \n \ 1511 - XAVIERUNIFORM \n \ 1512 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \ 1514 Random values scaled by the layer size \n \ 1516 \"TrainingStrategy\" \n \ 1517 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \ 1518 - explanation: two stacked training settings separated by \"|\" \n \ 1519 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \ 1520 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \ 1521 . LearningRate : \n \ 1522 - recommended for classification: 0.1 initially, 1e-4 later \n \ 1523 - recommended for regression: 1e-4 and less \n \ 1525 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \ 1526 . Repetitions : \n \ 1527 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \ 1528 . ConvergenceSteps : \n \ 1529 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \ 1530 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \ 1531 cycle the test samples are evaluated and thus the convergence is checked) \n \ 1533 Size of the mini-batches. \n \ 1534 . TestRepetitions \n \ 1535 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \ 1537 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \ 1539 NONE, L1 (|w|) or L2 (w^2) \n \ 1541 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \ 1542 in the DropConfig. \n \ 1543 [example: DropConfig=0.0+0.5+0.3 \n \ 1544 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \ 1546 recommended: leave all the nodes turned on for the input layer (layer 0) \n \ 1547 turn off half of the nodes in later layers for the initial training; leave all nodes \n \ 1548 turned on (0.0) in later training stages] \n \ 1549 . DropRepetitions \n \ 1550 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \ 1551 [recommended : 1] \n \ 1552 . Multithreading \n \ 1553 turn on multithreading [recommended: True] \n \ Types::EAnalysisType fAnalysisType
void GetHelpMessage() const
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
LayoutVector_t ParseLayoutString(TString layerSpec)
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
MsgLogger & Endl(MsgLogger &ml)
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
TString fLayoutString
The string defining the layout of the deep net.
Collectable string class.
Steepest Gradient Descent algorithm (SGD)
std::vector< std::map< TString, TString > > KeyValueVector_t
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
void MakeClassSpecific(std::ostream &, const TString &) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
void ToUpper()
Change string to upper case.
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
typename Architecture_t::Matrix_t Matrix_t
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Ranking for variables in method (implementation)
UInt_t GetNClasses() const
UInt_t GetNTargets() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
void SetWeightDecay(Scalar_t weightDecay)
TString fArchitectureString
The string defining the architecure: CPU or GPU.
const TString & GetInputLabel(Int_t i) const
void ReadWeightsFromStream(std::istream &i)
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
std::vector< Double_t > dropoutProbabilities
const Event * GetEvent() const
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void ReadWeightsFromXML(void *wghtnode)
void setInputSize(size_t sizeInput)
set the input size of the DNN
TString fTrainingStrategyString
The string defining the training strategy.
Generic neural network class.
std::vector< std::map< TString, TString > > KeyValueVector_t
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
DataSetInfo & DataInfo() const
Ssiz_t First(char c) const
Find first occurrence of a character c.
UInt_t GetNTargets() const
accessor to the number of targets
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
const TString & GetString() const
Float_t GetTarget(UInt_t itgt) const
const char * GetName() const
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
const Ranking * CreateRanking()
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void SetRegularization(ERegularization R)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Settings for the training of the neural net.
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
UInt_t GetNVariables() const
UInt_t GetNVariables() const
accessor to the number of variables
KeyValueVector_t fSettings
Map for the training strategy.
TString fErrorStrategy
The string defining the error strategy for training.
Layer defines the layout of a layer.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
size_t GetOutputWidth() const
void Copy(void *source, void *dest)
size_t GetInputWidth() const
UInt_t GetNumValidationSamples()
Bool_t WriteOptionsReference() const
Deep Neural Network Implementation.
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
std::vector< Float_t > * fMulticlassReturnVal
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
EOutputFunction
Enum that represents output functions.
static constexpr double s
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
DNN::ERegularization regularization
void AddPreDefVal(const T &)
#define REGISTER_METHOD(CLASS)
for example
void addLayer(Layer &layer)
add a layer (layout)
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > & GetValues()
IPythonInteractive * fInteractive
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
void InitializeGradients()
Initialize the gradients in the net to zero.
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Int_t Atoi() const
Return integer value of string.
EActivationFunction
Enum that represents layer activation functions.
std::vector< Float_t > * fRegressionReturnVal
Double_t Atof() const
Return floating-point value contained in string.
void AddWeightsXMLTo(void *parent) const
size_t GetBatchSize() const
virtual const std::vector< Float_t > & GetMulticlassValues()
virtual const std::vector< Float_t > & GetRegressionValues()
TString fWeightInitializationString
The string defining the weight initialization method.
Layer_t & GetLayer(size_t i)
std::unique_ptr< DeepNetImpl_t > fNet
const char * Data() const