81 : MethodBase(
jobName, Types::kDNN, methodTitle,
theData,
theOption), fWeightInitialization(), fOutputFunction(),
82 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
83 fArchitectureString(), fTrainingSettings(), fResume(
false), fSettings()
93 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
94 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
95 fTrainingSettings(), fResume(
false), fSettings()
97 fWeightInitialization = DNN::EInitialization::kGauss;
98 fOutputFunction = DNN::EOutputFunction::kSigmoid;
130 <<
"MethodDNN is deprecated and it will be removed in future ROOT version. "
131 "Please use MethodDL ( TMVA::kDL)"
152 DeclareOptionRef(fLayoutString=
"SOFTSIGN|(N+100)*2,LINEAR",
154 "Layout of the network.");
156 DeclareOptionRef(fValidationSize =
"20%",
"ValidationSize",
157 "Part of the training data to use for "
158 "validation. Specify as 0.2 or 20% to use a "
159 "fifth of the data set as validation set. "
160 "Specify as 100 to use exactly 100 events. "
163 DeclareOptionRef(fErrorStrategy=
"CROSSENTROPY",
165 "Loss function: Mean squared error (regression)"
166 " or cross entropy (binary classification).");
167 AddPreDefVal(
TString(
"CROSSENTROPY"));
168 AddPreDefVal(
TString(
"SUMOFSQUARES"));
169 AddPreDefVal(
TString(
"MUTUALEXCLUSIVE"));
171 DeclareOptionRef(fWeightInitializationString=
"XAVIER",
172 "WeightInitialization",
173 "Weight initialization strategy");
174 AddPreDefVal(
TString(
"XAVIER"));
175 AddPreDefVal(
TString(
"XAVIERUNIFORM"));
177 DeclareOptionRef(fArchitectureString =
"CPU",
"Architecture",
"Which architecture to perform the training on.");
178 AddPreDefVal(
TString(
"STANDARD"));
181 AddPreDefVal(
TString(
"OPENCL"));
184 fTrainingStrategyString =
"LearningRate=1e-1,"
187 "ConvergenceSteps=50,"
193 "DropRepetitions=5|LearningRate=1e-4,"
196 "ConvergenceSteps=50,"
201 "DropConfig=0.0+0.5+0.5,"
203 "Multithreading=True",
205 "Defines the training strategies.");
219 const size_t inputSize = GetNvar();
227 EActivationFunction activationFunction = EActivationFunction::kTanh;
263 numNodes =
fml.Eval (inputSize);
269 layout.push_back(std::make_pair(numNodes, activationFunction));
323 std::map<TString, TString>::const_iterator it =
keyValueMap.find (key);
348 return value.Atoi ();
355 TString key,
double defaultValue)
361 return value.Atof ();
381 TString key,
bool defaultValue)
399 std::vector<double> defaultValue)
406 std::vector<double> values;
413 std::stringstream
sstr;
426 if (IgnoreEventsWithNegWeightsInTraining()) {
428 <<
"Will ignore negative events in training!"
432 if (fArchitectureString ==
"STANDARD") {
433 Log() << kERROR <<
"The STANDARD architecture has been deprecated. "
434 "Please use Architecture=CPU or Architecture=CPU."
435 "See the TMVA Users' Guide for instructions if you "
436 "encounter problems."
438 Log() << kFATAL <<
"The STANDARD architecture has been deprecated. "
439 "Please use Architecture=CPU or Architecture=CPU."
440 "See the TMVA Users' Guide for instructions if you "
441 "encounter problems."
445 if (fArchitectureString ==
"OPENCL") {
446 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
447 "Please use Architecture=CPU or Architecture=CPU for the "
448 "time being. See the TMVA Users' Guide for instructions "
449 "if you encounter problems."
451 Log() << kFATAL <<
"The OPENCL architecture has not been implemented yet. "
452 "Please use Architecture=CPU or Architecture=CPU for the "
453 "time being. See the TMVA Users' Guide for instructions "
454 "if you encounter problems."
458 if (fArchitectureString ==
"GPU") {
460 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
461 "you have CUDA installed and it was successfully "
464 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
465 "you have CUDA installed and it was successfully "
471 if (fArchitectureString ==
"CPU") {
473 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure "
474 "you have a BLAS implementation and it was successfully "
475 "detected by CMake as well that the imt CMake flag is set."
477 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
478 "you have a BLAS implementation and it was successfully "
479 "detected by CMake as well that the imt CMake flag is set."
489 size_t inputSize = GetNVariables ();
490 size_t outputSize = 1;
492 outputSize = GetNTargets();
494 outputSize = DataInfo().GetNClasses();
497 fNet.SetBatchSize(1);
498 fNet.SetInputWidth(inputSize);
500 auto itLayout = std::begin (fLayout);
503 fNet.AddLayer((*itLayout).first, (*itLayout).second);
505 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
511 fOutputFunction = EOutputFunction::kSigmoid;
514 if (fErrorStrategy ==
"SUMOFSQUARES") {
515 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
517 if (fErrorStrategy ==
"CROSSENTROPY") {
518 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
520 fOutputFunction = EOutputFunction::kSigmoid;
522 if (fErrorStrategy !=
"SUMOFSQUARES") {
523 Log () << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
524 <<
" neural net error function. Setting error function to "
525 <<
" SUMOFSQUARES now." <<
Endl;
527 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
528 fOutputFunction = EOutputFunction::kIdentity;
530 if (fErrorStrategy ==
"SUMOFSQUARES") {
531 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
533 if (fErrorStrategy ==
"CROSSENTROPY") {
534 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
536 if (fErrorStrategy ==
"MUTUALEXCLUSIVE") {
537 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
539 fOutputFunction = EOutputFunction::kSoftmax;
546 if (fWeightInitializationString ==
"XAVIER") {
549 else if (fWeightInitializationString ==
"XAVIERUNIFORM") {
561 GetNumValidationSamples();
567 std::cout <<
"Parsed Training DNN string " << fTrainingStrategyString << std::endl;
579 std::vector<Double_t>());
599 fTrainingSettings.push_back(
settings);
619 if (fValidationSize.EndsWith(
"%")) {
627 Log() << kFATAL <<
"Cannot parse number \"" << fValidationSize
628 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
630 }
else if (fValidationSize.IsFloat()) {
641 Log() <<
kFATAL <<
"Cannot parse number \"" << fValidationSize <<
"\". Expected string like \"0.2\" or \"100\"."
648 Log() <<
kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is negative." <<
Endl;
652 Log() <<
kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is zero." <<
Endl;
656 Log() <<
kFATAL <<
"Validation size \"" << fValidationSize
657 <<
"\" is larger than or equal in size to training set (size=\"" <<
trainingSetSize <<
"\")." <<
Endl;
667 if (fInteractive && fInteractive->NotInitialized()){
668 std::vector<TString>
titles = {
"Error on training set",
"Error on test set"};
669 fInteractive->Init(
titles);
682 Log() << kFATAL <<
"Number of samples in the datasets are train: "
685 <<
"One of these is smaller than the batch size of "
686 <<
settings.batchSize <<
". Please increase the batch"
687 <<
" size to be at least the same size as the smallest"
688 <<
" of these values." <<
Endl;
692 if (fArchitectureString ==
"GPU") {
694 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
697 }
else if (fArchitectureString ==
"OpenCL") {
698 Log() << kFATAL <<
"OpenCL backend not yet supported." <<
Endl;
700 }
else if (fArchitectureString ==
"CPU") {
702 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
707 Log() << kINFO <<
"Using Standard Implementation.";
715 const std::vector<TMVA::Event *> &allData = GetEventCollection(
Types::kTraining);
720 const std::vector<Float_t>& values =
event->GetValues();
722 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
726 event->GetWeight()));
729 std::vector<Float_t>
oneHot(DataInfo().GetNClasses(), 0.0);
730 oneHot[
event->GetClass()] = 1.0;
733 event->GetWeight()));
736 const std::vector<Float_t>&
targets =
event->GetTargets ();
741 event->GetWeight ()));
747 const std::vector<Float_t>& values =
event->GetValues();
749 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
753 event->GetWeight()));
756 std::vector<Float_t>
oneHot(DataInfo().GetNClasses(), 0.0);
757 oneHot[
event->GetClass()] = 1.0;
760 event->GetWeight()));
763 const std::vector<Float_t>&
targets =
event->GetTargets ();
768 event->GetWeight ()));
774 std::vector<double> weights;
776 net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
778 net.setInputSize(fNet.GetInputWidth() + 1);
779 net.setOutputSize(fNet.GetOutputWidth() + 1);
781 for (
size_t i = 0; i < fNet.GetDepth(); i++) {
785 case EActivationFunction::kIdentity:
g = EnumFunction::LINEAR;
break;
786 case EActivationFunction::kRelu:
g = EnumFunction::RELU;
break;
787 case EActivationFunction::kSigmoid:
g = EnumFunction::SIGMOID;
break;
788 case EActivationFunction::kTanh:
g = EnumFunction::TANH;
break;
789 case EActivationFunction::kFastTanh:
g = EnumFunction::TANH;
break;
790 case EActivationFunction::kSymmRelu:
g = EnumFunction::SYMMRELU;
break;
791 case EActivationFunction::kSoftSign:
g = EnumFunction::SOFTSIGN;
break;
792 case EActivationFunction::kGauss:
g = EnumFunction::GAUSS;
break;
794 if (i < fNet.GetDepth() - 1) {
795 net.addLayer(
Layer(fNet.GetLayer(i).GetWidth(),
g));
798 switch(fOutputFunction) {
799 case EOutputFunction::kIdentity:
h = ModeOutputValues::DIRECT;
break;
800 case EOutputFunction::kSigmoid:
h = ModeOutputValues::SIGMOID;
break;
801 case EOutputFunction::kSoftmax:
h = ModeOutputValues::SOFTMAX;
break;
803 net.addLayer(
Layer(fNet.GetLayer(i).GetWidth(),
g,
h));
807 switch(fNet.GetLossFunction()) {
808 case ELossFunction::kMeanSquaredError:
809 net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
811 case ELossFunction::kCrossEntropy:
812 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
814 case ELossFunction::kSoftmaxCrossEntropy:
815 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
819 switch(fWeightInitialization) {
820 case EInitialization::kGauss:
821 net.initializeWeights(WeightInitializationStrategy::XAVIER,
822 std::back_inserter(weights));
824 case EInitialization::kUniform:
825 net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
826 std::back_inserter(weights));
829 net.initializeWeights(WeightInitializationStrategy::XAVIER,
830 std::back_inserter(weights));
835 for (
auto s : fTrainingSettings) {
838 switch(s.regularization) {
839 case ERegularization::kNone:
r = EnumRegularization::NONE;
break;
840 case ERegularization::kL1:
r = EnumRegularization::L1;
break;
841 case ERegularization::kL2:
r = EnumRegularization::L2;
break;
845 s.testInterval, s.weightDecay,
r,
846 MinimizerType::fSteepest, s.learningRate,
847 s.momentum, 1, s.multithreading);
851 <<
"Training with learning rate = " <<
ptrSettings->learningRate ()
853 <<
", repetitions = " <<
ptrSettings->repetitions ()
861 Log () << kINFO <<
"Drop configuration" <<
Endl
862 <<
" drop repetitions = " <<
ptrSettings->dropRepetitions()
868 Log () << kINFO <<
" Layer " << idx <<
" = " <<
f <<
Endl;
871 Log () << kINFO <<
Endl;
878 Log () << kINFO <<
Endl;
882 for (
size_t l = 0;
l < fNet.GetDepth();
l++) {
902 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
912 Log() << kINFO <<
"Start of neural network training on GPU." <<
Endl <<
Endl;
919 Log() << kDEBUG <<
"Using " <<
nTestSamples <<
" training samples." <<
Endl;
922 fNet.Initialize(fWeightInitialization);
926 fInteractive->ClearGraphs();
930 net.SetWeightDecay(
settings.weightDecay);
931 net.SetRegularization(
settings.regularization);
941 net.InitializeGradients();
944 Log() << kINFO <<
"Training phase " <<
trainingPhase <<
" of "
945 << fTrainingSettings.size() <<
":" <<
Endl;
958 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
961 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
968 net.GetBatchSize(), net.GetInputWidth(),
971 net.GetInputWidth(), net.GetOutputWidth(),
977 std::vector<TNet<TCuda<>>>
nets{};
978 std::vector<TBatch<TCuda<>>>
batches{};
980 for (
size_t i = 0; i <
nThreads; i++) {
982 for (
size_t j = 0;
j < net.GetDepth();
j++)
997 std::chrono::time_point<std::chrono::system_clock> start, end;
998 start = std::chrono::system_clock::now();
1000 if (!fInteractive) {
1001 Log() << std::setw(10) <<
"Epoch" <<
" | "
1002 << std::setw(12) <<
"Train Err."
1003 << std::setw(12) <<
"Test Err."
1004 << std::setw(12) <<
"GFLOP/s"
1005 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1006 std::string separator(62,
'-');
1007 Log() << separator <<
Endl;
1034 auto inputMatrix =
batch.GetInput();
1035 auto outputMatrix =
batch.GetOutput();
1043 end = std::chrono::system_clock::now();
1048 auto inputMatrix =
batch.GetInput();
1049 auto outputMatrix =
batch.GetOutput();
1060 nFlops *= net.GetNFlops() * 1
e-9;
1063 start = std::chrono::system_clock::now();
1069 if (fExitFromTraining)
break;
1071 Log() << std::setw(10) <<
stepCount <<
" | "
1074 << std::setw(12) <<
nFlops / seconds
1082 for (
size_t l = 0;
l < net.GetDepth();
l++) {
1090 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1091 "you have CUDA installed and it was successfully "
1092 "detected by CMAKE." <<
Endl;
1102 Log() << kINFO <<
"Start of neural network training on CPU." <<
Endl <<
Endl;
1109 Log() << kDEBUG <<
"Using " <<
nTestSamples <<
" training samples." <<
Endl;
1111 fNet.Initialize(fWeightInitialization);
1117 fInteractive->ClearGraphs();
1121 << fTrainingSettings.size() <<
":" <<
Endl;
1125 net.SetWeightDecay(
settings.weightDecay);
1126 net.SetRegularization(
settings.regularization);
1134 net.InitializeGradients();
1140 const std::vector<Event *> &allData = GetEventCollection(
Types::kTraining);
1147 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
1150 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
1157 net.GetBatchSize(), net.GetInputWidth(),
1160 net.GetInputWidth(), net.GetOutputWidth(),
1166 std::vector<TNet<TCpu<>>>
nets{};
1167 std::vector<TBatch<TCpu<>>>
batches{};
1169 for (
size_t i = 0; i <
nThreads; i++) {
1170 nets.push_back(net);
1171 for (
size_t j = 0;
j < net.GetDepth();
j++)
1186 std::chrono::time_point<std::chrono::system_clock> start, end;
1187 start = std::chrono::system_clock::now();
1189 if (!fInteractive) {
1190 Log() << std::setw(10) <<
"Epoch" <<
" | "
1191 << std::setw(12) <<
"Train Err."
1192 << std::setw(12) <<
"Test Err."
1193 << std::setw(12) <<
"GFLOP/s"
1194 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1195 std::string separator(62,
'-');
1196 Log() << separator <<
Endl;
1222 auto inputMatrix =
batch.GetInput();
1223 auto outputMatrix =
batch.GetOutput();
1224 auto weightMatrix =
batch.GetWeights();
1232 end = std::chrono::system_clock::now();
1237 auto inputMatrix =
batch.GetInput();
1238 auto outputMatrix =
batch.GetOutput();
1239 auto weightMatrix =
batch.GetWeights();
1240 trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1250 if (fExitFromTraining)
break;
1257 nFlops *= net.GetNFlops() * 1
e-9;
1260 start = std::chrono::system_clock::now();
1266 if (fExitFromTraining)
break;
1268 Log() << std::setw(10) <<
stepCount <<
" | "
1271 << std::setw(12) <<
nFlops / seconds
1281 for (
size_t l = 0;
l < net.GetDepth();
l++) {
1282 auto &
layer = fNet.GetLayer(
l);
1289 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
1290 "you have a BLAS implementation and it was successfully "
1291 "detected by CMake as well that the imt CMake flag is set." <<
Endl;
1299 size_t nVariables = GetEvent()->GetNVariables();
1303 const std::vector<Float_t>&
inputValues = GetEvent()->GetValues();
1308 fNet.Prediction(
YHat,
X, fOutputFunction);
1316 size_t nVariables = GetEvent()->GetNVariables();
1325 size_t nTargets = std::max(1u,
ev->GetNTargets());
1328 auto net = fNet.CreateClone(1);
1329 net.Prediction(
YHat,
X, fOutputFunction);
1331 for (
size_t i = 0; i <
nTargets; i++)
1334 if (fRegressionReturnVal ==
NULL) {
1335 fRegressionReturnVal =
new std::vector<Float_t>();
1337 fRegressionReturnVal->clear();
1340 for (
size_t i = 0; i <
nTargets; ++i) {
1344 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT);
1345 for (
size_t i = 0; i <
nTargets; ++i) {
1346 fRegressionReturnVal->push_back(
evT2->GetTarget(i));
1349 return *fRegressionReturnVal;
1354 size_t nVariables = GetEvent()->GetNVariables();
1357 if (fMulticlassReturnVal ==
NULL) {
1358 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
1361 const std::vector<Float_t>&
inputValues = GetEvent()->GetValues();
1366 fNet.Prediction(
YHat,
X, fOutputFunction);
1367 for (
size_t i = 0; i < (size_t)
YHat.GetNcols(); i++) {
1368 (*fMulticlassReturnVal)[i] =
YHat(0, i);
1370 return *fMulticlassReturnVal;
1378 Int_t inputWidth = fNet.GetInputWidth();
1380 char lossFunction =
static_cast<char>(fNet.GetLossFunction());
1382 gTools().StringFromInt(inputWidth));
1386 TString(
static_cast<char>(fOutputFunction)));
1389 const auto&
layer = fNet.GetLayer(i);
1391 int activationFunction =
static_cast<int>(
layer.GetActivationFunction());
1409 fNet.SetBatchSize(1);
1411 size_t inputWidth,
depth;
1419 fNet.SetInputWidth(inputWidth);
1425 for (
size_t i = 0; i <
depth; i++) {
1441 ReadMatrixXML(
layerXML,
"Weights", weights);
1443 fNet.GetLayer(i).GetWeights() = weights;
1444 fNet.GetLayer(i).GetBiases() =
biases;
1461 fRanking =
new Ranking( GetName(),
"Importance" );
1463 fRanking->AddRank(
Rank( GetInputLabel(
ivar), 1.0));
1487 Log() << col <<
"--- Short description:" <<
colres <<
Endl;
1489 Log() <<
"The DNN neural network is a feedforward" <<
Endl;
1490 Log() <<
"multilayer perceptron implementation. The DNN has a user-" <<
Endl;
1491 Log() <<
"defined hidden layer architecture, where the number of input (output)" <<
Endl;
1492 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1493 Log() <<
"signal and one background, regression or multiclass). " <<
Endl;
1495 Log() << col <<
"--- Performance optimisation:" <<
colres <<
Endl;
1498 const char*
txt =
"The DNN supports various options to improve performance in terms of training speed and \n \
1499reduction of overfitting: \n \
1501 - different training settings can be stacked. Such that the initial training \n\
1502 is done with a large learning rate and a large drop out fraction whilst \n \
1503 in a later stage learning rate and drop out can be reduced. \n \
1506 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1507 later training stage: 0.1 or 0.0 for all layers \n \
1508 final training stage: 0.0] \n \
1509 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1510 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1511 - L1 and L2 regularization are available \n \
1513 [recommended 10 - 150] \n \
1514 Arbitrary mini-batch sizes can be chosen. \n \
1515 - Multithreading \n \
1516 [recommended: True] \n \
1517 Multithreading can be turned on. The minibatches are distributed to the available \n \
1518 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1522 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1524 . two hidden layers (separated by \",\") \n \
1525 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1526 . the activation function for the output layer is LINEAR \n \
1527 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1528 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1529 . the number of nodes in the output layer is determined by the number of output nodes \n \
1530 and can therefore not be chosen freely. \n \
1532 \"ErrorStrategy\": \n \
1534 The error of the neural net is determined by a sum-of-squares error function \n \
1535 For regression, this is the only possible choice. \n \
1537 The error of the neural net is determined by a cross entropy function. The \n \
1538 output values are automatically (internally) transformed into probabilities \n \
1539 using a sigmoid function. \n \
1540 For signal/background classification this is the default choice. \n \
1541 For multiclass using cross entropy more than one or no output classes \n \
1542 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1543 A and C is true, Event 2: C is true, ...) \n \
1544 - MUTUALEXCLUSIVE \n \
1545 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1547 \"WeightInitialization\" \n \
1550 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1551 such that the variance of the values of the nodes is preserved for each layer. \n \
1552 - XAVIERUNIFORM \n \
1553 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1555 Random values scaled by the layer size \n \
1557 \"TrainingStrategy\" \n \
1558 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1559 - explanation: two stacked training settings separated by \"|\" \n \
1560 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1561 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1562 . LearningRate : \n \
1563 - recommended for classification: 0.1 initially, 1e-4 later \n \
1564 - recommended for regression: 1e-4 and less \n \
1566 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1567 . Repetitions : \n \
1568 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1569 . ConvergenceSteps : \n \
1570 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1571 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1572 cycle the test samples are evaluated and thus the convergence is checked) \n \
1574 Size of the mini-batches. \n \
1575 . TestRepetitions \n \
1576 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1578 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1580 NONE, L1 (|w|) or L2 (w^2) \n \
1582 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1583 in the DropConfig. \n \
1584 [example: DropConfig=0.0+0.5+0.3 \n \
1585 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1587 recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1588 turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1589 turned on (0.0) in later training stages] \n \
1590 . DropRepetitions \n \
1591 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1592 [recommended : 1] \n \
1593 . Multithreading \n \
1594 turn on multithreading [recommended: True] \n \
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
Bool_t WriteOptionsReference() const
Layer defines the layout of a layer.
Settings for the training of the neural net.
Steepest Gradient Descent algorithm (SGD)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void Copy(Matrix_t &B, const Matrix_t &A)
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
size_t GetTestInterval() const
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
size_t GetConvergenceCount() const
size_t GetConvergenceSteps() const
Deep Neural Network Implementation.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
virtual const std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
void ReadWeightsFromXML(void *wghtnode)
typename Architecture_t::Matrix_t Matrix_t
void ReadWeightsFromStream(std::istream &i)
LayoutVector_t ParseLayoutString(TString layerSpec)
void MakeClassSpecific(std::ostream &, const TString &) const
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
virtual Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EInitialization fWeightInitialization
const Ranking * CreateRanking()
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
DNN::EOutputFunction fOutputFunction
void AddWeightsXMLTo(void *parent) const
void GetHelpMessage() const
virtual const std::vector< Float_t > & GetRegressionValues()
Ranking for variables in method (implementation)
Collectable string class.
Int_t Atoi() const
Return integer value of string.
void ToUpper()
Change string to upper case.
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
EOutputFunction
Enum that represents output functions.
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
create variable transformations
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
Double_t Log(Double_t x)
Returns the natural logarithm of x.