Logo ROOT  
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDNN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A neural network implementation *
12  * *
13  * Authors (alphabetical): *
14  * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 /*! \class TMVA::MethodDNN
29 \ingroup TMVA
30 Deep Neural Network Implementation.
31 */
32 
33 #include "TMVA/MethodDNN.h"
34 
35 #include "TString.h"
36 #include "TFormula.h"
37 #include "TObjString.h"
38 
39 #include "TMVA/ClassifierFactory.h"
40 #include "TMVA/Configurable.h"
41 #include "TMVA/IMethod.h"
42 #include "TMVA/MsgLogger.h"
43 #include "TMVA/MethodBase.h"
44 #include "TMVA/Timer.h"
45 #include "TMVA/Types.h"
46 #include "TMVA/Tools.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Ranking.h"
49 
50 #include "TMVA/DNN/Net.h"
52 
53 #include "TMVA/NeuralNet.h"
54 #include "TMVA/Monitoring.h"
55 
56 #include <algorithm>
57 #include <iostream>
58 #include <string>
59 #include <iomanip>
60 
61 REGISTER_METHOD(DNN)
62 
64 
65 namespace TMVA
66 {
67  using namespace DNN;
68 
69  ////////////////////////////////////////////////////////////////////////////////
70  /// standard constructor
71 
72  TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
73  const TString &theOption)
74  : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
75  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
76  fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
77  {
78 }
79 
80 ////////////////////////////////////////////////////////////////////////////////
81 /// constructor from a weight file
82 
83 TMVA::MethodDNN::MethodDNN(DataSetInfo& theData,
84  const TString& theWeightFile)
85  : MethodBase( Types::kDNN, theData, theWeightFile),
86  fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
87  fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
88  fTrainingSettings(), fResume(false), fSettings()
89 {
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 /// destructor
96 
98 {
99  fWeightInitialization = DNN::EInitialization::kGauss;
100  fOutputFunction = DNN::EOutputFunction::kSigmoid;
101 }
102 
103 ////////////////////////////////////////////////////////////////////////////////
104 /// MLP can handle classification with 2 classes and regression with
105 /// one regression-target
106 
108  UInt_t numberClasses,
109  UInt_t /*numberTargets*/ )
110 {
111  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
112  if (type == Types::kMulticlass ) return kTRUE;
113  if (type == Types::kRegression ) return kTRUE;
114 
115  return kFALSE;
116 }
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 /// default initializations
120 
121 void TMVA::MethodDNN::Init() {
122  Log() << kWARNING
123  << "MethodDNN is deprecated and it will be removed in future ROOT version. "
124  "Please use MethodDL ( TMVA::kDL)"
125  << Endl;
126 
127 }
128 
129 ////////////////////////////////////////////////////////////////////////////////
130 /// Options to be set in the option string:
131 ///
132 /// - LearningRate <float> DNN learning rate parameter.
133 /// - DecayRate <float> Decay rate for learning parameter.
134 /// - TestRate <int> Period of validation set error computation.
135 /// - BatchSize <int> Number of event per batch.
136 ///
137 /// - ValidationSize <string> How many events to use for validation. "0.2"
138 /// or "20%" indicates that a fifth of the
139 /// training data should be used. "100"
140 /// indicates that 100 events should be used.
141 
143 {
144 
145  DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
146  "Layout",
147  "Layout of the network.");
148 
149  DeclareOptionRef(fValidationSize = "20%", "ValidationSize",
150  "Part of the training data to use for "
151  "validation. Specify as 0.2 or 20% to use a "
152  "fifth of the data set as validation set. "
153  "Specify as 100 to use exactly 100 events. "
154  "(Default: 20%)");
155 
156  DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
157  "ErrorStrategy",
158  "Loss function: Mean squared error (regression)"
159  " or cross entropy (binary classification).");
160  AddPreDefVal(TString("CROSSENTROPY"));
161  AddPreDefVal(TString("SUMOFSQUARES"));
162  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
163 
164  DeclareOptionRef(fWeightInitializationString="XAVIER",
165  "WeightInitialization",
166  "Weight initialization strategy");
167  AddPreDefVal(TString("XAVIER"));
168  AddPreDefVal(TString("XAVIERUNIFORM"));
169 
170  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
171  AddPreDefVal(TString("STANDARD"));
172  AddPreDefVal(TString("CPU"));
173  AddPreDefVal(TString("GPU"));
174  AddPreDefVal(TString("OPENCL"));
175 
176  DeclareOptionRef(
177  fTrainingStrategyString = "LearningRate=1e-1,"
178  "Momentum=0.3,"
179  "Repetitions=3,"
180  "ConvergenceSteps=50,"
181  "BatchSize=30,"
182  "TestRepetitions=7,"
183  "WeightDecay=0.0,"
184  "Renormalize=L2,"
185  "DropConfig=0.0,"
186  "DropRepetitions=5|LearningRate=1e-4,"
187  "Momentum=0.3,"
188  "Repetitions=3,"
189  "ConvergenceSteps=50,"
190  "BatchSize=20,"
191  "TestRepetitions=7,"
192  "WeightDecay=0.001,"
193  "Renormalize=L2,"
194  "DropConfig=0.0+0.5+0.5,"
195  "DropRepetitions=5,"
196  "Multithreading=True",
197  "TrainingStrategy",
198  "Defines the training strategies.");
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// parse layout specification string and return a vector, each entry
203 /// containing the number of neurons to go in each successive layer
204 
206  -> LayoutVector_t
207 {
208  LayoutVector_t layout;
209  const TString layerDelimiter(",");
210  const TString subDelimiter("|");
211 
212  const size_t inputSize = GetNvar();
213 
214  TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
215  TIter nextLayer (layerStrings);
216  TObjString* layerString = (TObjString*)nextLayer ();
217 
218  for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
219  int numNodes = 0;
220  EActivationFunction activationFunction = EActivationFunction::kTanh;
221 
222  TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
223  TIter nextToken (subStrings);
224  TObjString* token = (TObjString *) nextToken();
225  int idxToken = 0;
226  for (; token != nullptr; token = (TObjString *) nextToken()) {
227  switch (idxToken)
228  {
229  case 0:
230  {
231  TString strActFnc (token->GetString ());
232  if (strActFnc == "RELU") {
233  activationFunction = DNN::EActivationFunction::kRelu;
234  } else if (strActFnc == "TANH") {
235  activationFunction = DNN::EActivationFunction::kTanh;
236  } else if (strActFnc == "SYMMRELU") {
237  activationFunction = DNN::EActivationFunction::kSymmRelu;
238  } else if (strActFnc == "SOFTSIGN") {
239  activationFunction = DNN::EActivationFunction::kSoftSign;
240  } else if (strActFnc == "SIGMOID") {
241  activationFunction = DNN::EActivationFunction::kSigmoid;
242  } else if (strActFnc == "LINEAR") {
243  activationFunction = DNN::EActivationFunction::kIdentity;
244  } else if (strActFnc == "GAUSS") {
245  activationFunction = DNN::EActivationFunction::kGauss;
246  }
247  }
248  break;
249  case 1: // number of nodes
250  {
251  TString strNumNodes (token->GetString ());
252  TString strN ("x");
253  strNumNodes.ReplaceAll ("N", strN);
254  strNumNodes.ReplaceAll ("n", strN);
255  TFormula fml ("tmp",strNumNodes);
256  numNodes = fml.Eval (inputSize);
257  }
258  break;
259  }
260  ++idxToken;
261  }
262  layout.push_back(std::make_pair(numNodes, activationFunction));
263  }
264  return layout;
265 }
266 
267 ////////////////////////////////////////////////////////////////////////////////
268 /// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
269 
271  TString blockDelim,
272  TString tokenDelim)
273  -> KeyValueVector_t
274 {
275  KeyValueVector_t blockKeyValues;
276  const TString keyValueDelim ("=");
277 
278  TObjArray* blockStrings = parseString.Tokenize (blockDelim);
279  TIter nextBlock (blockStrings);
280  TObjString* blockString = (TObjString *) nextBlock();
281 
282  for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
283  {
284  blockKeyValues.push_back (std::map<TString,TString>());
285  std::map<TString,TString>& currentBlock = blockKeyValues.back ();
286 
287  TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
288  TIter nextToken (subStrings);
289  TObjString* token = (TObjString*)nextToken ();
290 
291  for (; token != nullptr; token = (TObjString *)nextToken())
292  {
293  TString strKeyValue (token->GetString ());
294  int delimPos = strKeyValue.First (keyValueDelim.Data ());
295  if (delimPos <= 0)
296  continue;
297 
298  TString strKey = TString (strKeyValue (0, delimPos));
299  strKey.ToUpper();
300  TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
301 
302  strKey.Strip (TString::kBoth, ' ');
303  strValue.Strip (TString::kBoth, ' ');
304 
305  currentBlock.insert (std::make_pair (strKey, strValue));
306  }
307  }
308  return blockKeyValues;
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 
313 TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
314 {
315  key.ToUpper ();
316  std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
317  if (it == keyValueMap.end()) {
318  return TString ("");
319  }
320  return it->second;
321 }
322 
323 ////////////////////////////////////////////////////////////////////////////////
324 
325 template <typename T>
326 T fetchValue(const std::map<TString,TString>& keyValueMap,
327  TString key,
328  T defaultValue);
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
332 template <>
333 int fetchValue(const std::map<TString,TString>& keyValueMap,
334  TString key,
335  int defaultValue)
336 {
337  TString value (fetchValue (keyValueMap, key));
338  if (value == "") {
339  return defaultValue;
340  }
341  return value.Atoi ();
342 }
343 
344 ////////////////////////////////////////////////////////////////////////////////
345 
346 template <>
347 double fetchValue (const std::map<TString,TString>& keyValueMap,
348  TString key, double defaultValue)
349 {
350  TString value (fetchValue (keyValueMap, key));
351  if (value == "") {
352  return defaultValue;
353  }
354  return value.Atof ();
355 }
356 
357 ////////////////////////////////////////////////////////////////////////////////
358 
359 template <>
360 TString fetchValue (const std::map<TString,TString>& keyValueMap,
361  TString key, TString defaultValue)
362 {
363  TString value (fetchValue (keyValueMap, key));
364  if (value == "") {
365  return defaultValue;
366  }
367  return value;
368 }
369 
370 ////////////////////////////////////////////////////////////////////////////////
371 
372 template <>
373 bool fetchValue (const std::map<TString,TString>& keyValueMap,
374  TString key, bool defaultValue)
375 {
376  TString value (fetchValue (keyValueMap, key));
377  if (value == "") {
378  return defaultValue;
379  }
380  value.ToUpper ();
381  if (value == "TRUE" || value == "T" || value == "1") {
382  return true;
383  }
384  return false;
385 }
386 
387 ////////////////////////////////////////////////////////////////////////////////
388 
389 template <>
390 std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
391  TString key,
392  std::vector<double> defaultValue)
393 {
394  TString parseString (fetchValue (keyValueMap, key));
395  if (parseString == "") {
396  return defaultValue;
397  }
398  parseString.ToUpper ();
399  std::vector<double> values;
400 
401  const TString tokenDelim ("+");
402  TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
403  TIter nextToken (tokenStrings);
404  TObjString* tokenString = (TObjString*)nextToken ();
405  for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
406  std::stringstream sstr;
407  double currentValue;
408  sstr << tokenString->GetString ().Data ();
409  sstr >> currentValue;
410  values.push_back (currentValue);
411  }
412  return values;
413 }
414 
415 ////////////////////////////////////////////////////////////////////////////////
416 
418 {
419  if (IgnoreEventsWithNegWeightsInTraining()) {
420  Log() << kINFO
421  << "Will ignore negative events in training!"
422  << Endl;
423  }
424 
425  if (fArchitectureString == "STANDARD") {
426  Log() << kERROR << "The STANDARD architecture has been deprecated. "
427  "Please use Architecture=CPU or Architecture=CPU."
428  "See the TMVA Users' Guide for instructions if you "
429  "encounter problems."
430  << Endl;
431  Log() << kFATAL << "The STANDARD architecture has been deprecated. "
432  "Please use Architecture=CPU or Architecture=CPU."
433  "See the TMVA Users' Guide for instructions if you "
434  "encounter problems."
435  << Endl;
436  }
437 
438  if (fArchitectureString == "OPENCL") {
439  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
440  "Please use Architecture=CPU or Architecture=CPU for the "
441  "time being. See the TMVA Users' Guide for instructions "
442  "if you encounter problems."
443  << Endl;
444  Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
445  "Please use Architecture=CPU or Architecture=CPU for the "
446  "time being. See the TMVA Users' Guide for instructions "
447  "if you encounter problems."
448  << Endl;
449  }
450 
451  if (fArchitectureString == "GPU") {
452 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
453  Log() << kERROR << "CUDA backend not enabled. Please make sure "
454  "you have CUDA installed and it was successfully "
455  "detected by CMAKE."
456  << Endl;
457  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
458  "you have CUDA installed and it was successfully "
459  "detected by CMAKE."
460  << Endl;
461 #endif // DNNCUDA
462  }
463 
464  if (fArchitectureString == "CPU") {
465 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
466  Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
467  "you have a BLAS implementation and it was successfully "
468  "detected by CMake as well that the imt CMake flag is set."
469  << Endl;
470  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
471  "you have a BLAS implementation and it was successfully "
472  "detected by CMake as well that the imt CMake flag is set."
473  << Endl;
474 #endif // DNNCPU
475  }
476 
477  //
478  // Set network structure.
479  //
480 
481  fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
482  size_t inputSize = GetNVariables ();
483  size_t outputSize = 1;
484  if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
485  outputSize = GetNTargets();
486  } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
487  outputSize = DataInfo().GetNClasses();
488  }
489 
490  fNet.SetBatchSize(1);
491  fNet.SetInputWidth(inputSize);
492 
493  auto itLayout = std::begin (fLayout);
494  auto itLayoutEnd = std::end (fLayout)-1;
495  for ( ; itLayout != itLayoutEnd; ++itLayout) {
496  fNet.AddLayer((*itLayout).first, (*itLayout).second);
497  }
498  fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
499 
500  //
501  // Loss function and output.
502  //
503 
504  fOutputFunction = EOutputFunction::kSigmoid;
505  if (fAnalysisType == Types::kClassification)
506  {
507  if (fErrorStrategy == "SUMOFSQUARES") {
508  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
509  }
510  if (fErrorStrategy == "CROSSENTROPY") {
511  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
512  }
513  fOutputFunction = EOutputFunction::kSigmoid;
514  } else if (fAnalysisType == Types::kRegression) {
515  if (fErrorStrategy != "SUMOFSQUARES") {
516  Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
517  << " neural net error function. Setting error function to "
518  << " SUMOFSQUARES now." << Endl;
519  }
520  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
521  fOutputFunction = EOutputFunction::kIdentity;
522  } else if (fAnalysisType == Types::kMulticlass) {
523  if (fErrorStrategy == "SUMOFSQUARES") {
524  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
525  }
526  if (fErrorStrategy == "CROSSENTROPY") {
527  fNet.SetLossFunction(ELossFunction::kCrossEntropy);
528  }
529  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
530  fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
531  }
532  fOutputFunction = EOutputFunction::kSoftmax;
533  }
534 
535  //
536  // Initialization
537  //
538 
539  if (fWeightInitializationString == "XAVIER") {
540  fWeightInitialization = DNN::EInitialization::kGauss;
541  }
542  else if (fWeightInitializationString == "XAVIERUNIFORM") {
543  fWeightInitialization = DNN::EInitialization::kUniform;
544  }
545  else {
546  fWeightInitialization = DNN::EInitialization::kGauss;
547  }
548 
549  //
550  // Training settings.
551  //
552 
553  // Force validation of the ValidationSize option
554  GetNumValidationSamples();
555 
556  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
557  TString ("|"),
558  TString (","));
559 
560  std::cout << "Parsed Training DNN string " << fTrainingStrategyString << std::endl;
561  std::cout << "STring has size " << strategyKeyValues.size() << std::endl;
562  for (auto& block : strategyKeyValues) {
563  TTrainingSettings settings;
564 
565  settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
566  settings.batchSize = fetchValue(block, "BatchSize", 30);
567  settings.testInterval = fetchValue(block, "TestRepetitions", 7);
568  settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
569  settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
570  settings.momentum = fetchValue(block, "Momentum", 0.3);
571  settings.dropoutProbabilities = fetchValue(block, "DropConfig",
572  std::vector<Double_t>());
573 
574  TString regularization = fetchValue(block, "Regularization",
575  TString ("NONE"));
576  if (regularization == "L1") {
578  } else if (regularization == "L2") {
580  } else {
582  }
583 
584  TString strMultithreading = fetchValue(block, "Multithreading",
585  TString ("True"));
586  if (strMultithreading.BeginsWith ("T")) {
587  settings.multithreading = true;
588  } else {
589  settings.multithreading = false;
590  }
591 
592  fTrainingSettings.push_back(settings);
593  }
594 }
595 
596 ////////////////////////////////////////////////////////////////////////////////
597 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
598 /// 100 etc.
599 /// - 20% and 0.2 selects 20% of the training set as validation data.
600 /// - 100 selects 100 events as the validation data.
601 ///
602 /// @return number of samples in validation set
603 ///
604 
606 {
607  Int_t nValidationSamples = 0;
608  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
609 
610  // Parsing + Validation
611  // --------------------
612  if (fValidationSize.EndsWith("%")) {
613  // Relative spec. format 20%
614  TString intValStr = TString(fValidationSize.Strip(TString::kTrailing, '%'));
615 
616  if (intValStr.IsFloat()) {
617  Double_t valSizeAsDouble = fValidationSize.Atof() / 100.0;
618  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
619  } else {
620  Log() << kFATAL << "Cannot parse number \"" << fValidationSize
621  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
622  }
623  } else if (fValidationSize.IsFloat()) {
624  Double_t valSizeAsDouble = fValidationSize.Atof();
625 
626  if (valSizeAsDouble < 1.0) {
627  // Relative spec. format 0.2
628  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
629  } else {
630  // Absolute spec format 100 or 100.0
631  nValidationSamples = valSizeAsDouble;
632  }
633  } else {
634  Log() << kFATAL << "Cannot parse number \"" << fValidationSize << "\". Expected string like \"0.2\" or \"100\"."
635  << Endl;
636  }
637 
638  // Value validation
639  // ----------------
640  if (nValidationSamples < 0) {
641  Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is negative." << Endl;
642  }
643 
644  if (nValidationSamples == 0) {
645  Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is zero." << Endl;
646  }
647 
648  if (nValidationSamples >= (Int_t)trainingSetSize) {
649  Log() << kFATAL << "Validation size \"" << fValidationSize
650  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
651  }
652 
653  return nValidationSamples;
654 }
655 
656 ////////////////////////////////////////////////////////////////////////////////
657 
659 {
660  if (fInteractive && fInteractive->NotInitialized()){
661  std::vector<TString> titles = {"Error on training set", "Error on test set"};
662  fInteractive->Init(titles);
663  // JsMVA progress bar maximum (100%)
664  fIPyMaxIter = 100;
665  }
666 
667  for (TTrainingSettings & settings : fTrainingSettings) {
668  size_t nValidationSamples = GetNumValidationSamples();
669  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
670  size_t nTestSamples = nValidationSamples;
671 
672  if (nTrainingSamples < settings.batchSize ||
673  nValidationSamples < settings.batchSize ||
674  nTestSamples < settings.batchSize) {
675  Log() << kFATAL << "Number of samples in the datasets are train: "
676  << nTrainingSamples << " valid: " << nValidationSamples
677  << " test: " << nTestSamples << ". "
678  << "One of these is smaller than the batch size of "
679  << settings.batchSize << ". Please increase the batch"
680  << " size to be at least the same size as the smallest"
681  << " of these values." << Endl;
682  }
683  }
684 
685  if (fArchitectureString == "GPU") {
686  TrainGpu();
687  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
688  ExitFromTraining();
689  return;
690  } else if (fArchitectureString == "OpenCL") {
691  Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
692  return;
693  } else if (fArchitectureString == "CPU") {
694  TrainCpu();
695  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
696  ExitFromTraining();
697  return;
698  }
699 
700  Log() << kINFO << "Using Standard Implementation.";
701 
702  std::vector<Pattern> trainPattern;
703  std::vector<Pattern> testPattern;
704 
705  size_t nValidationSamples = GetNumValidationSamples();
706  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
707 
708  const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
709  const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
710  const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
711 
712  for (auto &event : eventCollectionTraining) {
713  const std::vector<Float_t>& values = event->GetValues();
714  if (fAnalysisType == Types::kClassification) {
715  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
716  trainPattern.push_back(Pattern (values.begin(),
717  values.end(),
718  outputValue,
719  event->GetWeight()));
720  trainPattern.back().addInput(1.0);
721  } else if (fAnalysisType == Types::kMulticlass) {
722  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
723  oneHot[event->GetClass()] = 1.0;
724  trainPattern.push_back(Pattern (values.begin(), values.end(),
725  oneHot.cbegin(), oneHot.cend(),
726  event->GetWeight()));
727  trainPattern.back().addInput(1.0);
728  } else {
729  const std::vector<Float_t>& targets = event->GetTargets ();
730  trainPattern.push_back(Pattern(values.begin(),
731  values.end(),
732  targets.begin(),
733  targets.end(),
734  event->GetWeight ()));
735  trainPattern.back ().addInput (1.0); // bias node
736  }
737  }
738 
739  for (auto &event : eventCollectionTesting) {
740  const std::vector<Float_t>& values = event->GetValues();
741  if (fAnalysisType == Types::kClassification) {
742  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
743  testPattern.push_back(Pattern (values.begin(),
744  values.end(),
745  outputValue,
746  event->GetWeight()));
747  testPattern.back().addInput(1.0);
748  } else if (fAnalysisType == Types::kMulticlass) {
749  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
750  oneHot[event->GetClass()] = 1.0;
751  testPattern.push_back(Pattern (values.begin(), values.end(),
752  oneHot.cbegin(), oneHot.cend(),
753  event->GetWeight()));
754  testPattern.back().addInput(1.0);
755  } else {
756  const std::vector<Float_t>& targets = event->GetTargets ();
757  testPattern.push_back(Pattern(values.begin(),
758  values.end(),
759  targets.begin(),
760  targets.end(),
761  event->GetWeight ()));
762  testPattern.back ().addInput (1.0); // bias node
763  }
764  }
765 
766  TMVA::DNN::Net net;
767  std::vector<double> weights;
768 
769  net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
770 
771  net.setInputSize(fNet.GetInputWidth() + 1);
772  net.setOutputSize(fNet.GetOutputWidth() + 1);
773 
774  for (size_t i = 0; i < fNet.GetDepth(); i++) {
775  EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
776  EnumFunction g = EnumFunction::LINEAR;
777  switch(f) {
778  case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
779  case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
780  case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
781  case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
782  case EActivationFunction::kFastTanh: g = EnumFunction::TANH; break;
783  case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
784  case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
785  case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
786  }
787  if (i < fNet.GetDepth() - 1) {
788  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
789  } else {
790  ModeOutputValues h = ModeOutputValues::DIRECT;
791  switch(fOutputFunction) {
792  case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
793  case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
794  case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
795  }
796  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
797  }
798  }
799 
800  switch(fNet.GetLossFunction()) {
801  case ELossFunction::kMeanSquaredError:
802  net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
803  break;
805  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
806  break;
807  case ELossFunction::kSoftmaxCrossEntropy:
808  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
809  break;
810  }
811 
812  switch(fWeightInitialization) {
814  net.initializeWeights(WeightInitializationStrategy::XAVIER,
815  std::back_inserter(weights));
816  break;
818  net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
819  std::back_inserter(weights));
820  break;
821  default:
822  net.initializeWeights(WeightInitializationStrategy::XAVIER,
823  std::back_inserter(weights));
824  break;
825  }
826 
827  int idxSetting = 0;
828  for (auto s : fTrainingSettings) {
829 
831  switch(s.regularization) {
833  case ERegularization::kL1: r = EnumRegularization::L1; break;
834  case ERegularization::kL2: r = EnumRegularization::L2; break;
835  }
836 
837  Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
838  s.testInterval, s.weightDecay, r,
839  MinimizerType::fSteepest, s.learningRate,
840  s.momentum, 1, s.multithreading);
841  std::shared_ptr<Settings> ptrSettings(settings);
842  ptrSettings->setMonitoring (0);
843  Log() << kINFO
844  << "Training with learning rate = " << ptrSettings->learningRate ()
845  << ", momentum = " << ptrSettings->momentum ()
846  << ", repetitions = " << ptrSettings->repetitions ()
847  << Endl;
848 
849  ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
850  (idxSetting+1)*100.0/(fSettings.size ()));
851 
852  const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
853  if (!dropConfig.empty ()) {
854  Log () << kINFO << "Drop configuration" << Endl
855  << " drop repetitions = " << ptrSettings->dropRepetitions()
856  << Endl;
857  }
858 
859  int idx = 0;
860  for (auto f : dropConfig) {
861  Log () << kINFO << " Layer " << idx << " = " << f << Endl;
862  ++idx;
863  }
864  Log () << kINFO << Endl;
865 
866  DNN::Steepest minimizer(ptrSettings->learningRate(),
867  ptrSettings->momentum(),
868  ptrSettings->repetitions());
869  net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
870  ptrSettings.reset();
871  Log () << kINFO << Endl;
872  idxSetting++;
873  }
874  size_t weightIndex = 0;
875  for (size_t l = 0; l < fNet.GetDepth(); l++) {
876  auto & layerWeights = fNet.GetLayer(l).GetWeights();
877  for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
878  for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
879  layerWeights(i,j) = weights[weightIndex];
880  weightIndex++;
881  }
882  }
883  auto & layerBiases = fNet.GetLayer(l).GetBiases();
884  if (l == 0) {
885  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
886  layerBiases(i,0) = weights[weightIndex];
887  weightIndex++;
888  }
889  } else {
890  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
891  layerBiases(i,0) = 0.0;
892  }
893  }
894  }
895  if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
896  ExitFromTraining();
897 }
898 
899 ////////////////////////////////////////////////////////////////////////////////
900 
902 {
903 
904 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
905  Log() << kINFO << "Start of neural network training on GPU." << Endl << Endl;
906 
907  size_t nValidationSamples = GetNumValidationSamples();
908  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
909  size_t nTestSamples = nValidationSamples;
910 
911  Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
912  Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
913 
914  size_t trainingPhase = 1;
915  fNet.Initialize(fWeightInitialization);
916  for (TTrainingSettings & settings : fTrainingSettings) {
917 
918  if (fInteractive){
919  fInteractive->ClearGraphs();
920  }
921 
922  TNet<TCuda<>> net(settings.batchSize, fNet);
923  net.SetWeightDecay(settings.weightDecay);
924  net.SetRegularization(settings.regularization);
925 
926  // Need to convert dropoutprobabilities to conventions used
927  // by backend implementation.
928  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
929  for (auto & p : dropoutVector) {
930  p = 1.0 - p;
931  }
932  net.SetDropoutProbabilities(dropoutVector);
933 
934  net.InitializeGradients();
935  auto testNet = net.CreateClone(settings.batchSize);
936 
937  Log() << kINFO << "Training phase " << trainingPhase << " of "
938  << fTrainingSettings.size() << ":" << Endl;
939  trainingPhase++;
940 
941  using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
942 
943  // Split training data into training and validation set
944  const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
945  const std::vector<Event *> trainingInputData =
946  std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
947  const std::vector<Event *> testInputData =
948  std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
949 
950  if (trainingInputData.size() != nTrainingSamples) {
951  Log() << kFATAL << "Inconsistent training sample size" << Endl;
952  }
953  if (testInputData.size() != nTestSamples) {
954  Log() << kFATAL << "Inconsistent test sample size" << Endl;
955  }
956 
957  size_t nThreads = 1;
958  TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
959  TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
960  DataLoader_t trainingData(trainingTuple, nTrainingSamples,
961  net.GetBatchSize(), net.GetInputWidth(),
962  net.GetOutputWidth(), nThreads);
963  DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
964  net.GetInputWidth(), net.GetOutputWidth(),
965  nThreads);
966  DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
967  settings.convergenceSteps,
968  settings.testInterval);
969 
970  std::vector<TNet<TCuda<>>> nets{};
971  std::vector<TBatch<TCuda<>>> batches{};
972  nets.reserve(nThreads);
973  for (size_t i = 0; i < nThreads; i++) {
974  nets.push_back(net);
975  for (size_t j = 0; j < net.GetDepth(); j++)
976  {
977  auto &masterLayer = net.GetLayer(j);
978  auto &layer = nets.back().GetLayer(j);
979  TCuda<>::Copy(layer.GetWeights(),
980  masterLayer.GetWeights());
981  TCuda<>::Copy(layer.GetBiases(),
982  masterLayer.GetBiases());
983  }
984  }
985 
986  bool converged = false;
987  size_t stepCount = 0;
988  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
989 
990  std::chrono::time_point<std::chrono::system_clock> start, end;
991  start = std::chrono::system_clock::now();
992 
993  if (!fInteractive) {
994  Log() << std::setw(10) << "Epoch" << " | "
995  << std::setw(12) << "Train Err."
996  << std::setw(12) << "Test Err."
997  << std::setw(12) << "GFLOP/s"
998  << std::setw(12) << "Conv. Steps" << Endl;
999  std::string separator(62, '-');
1000  Log() << separator << Endl;
1001  }
1002 
1003  while (!converged)
1004  {
1005  stepCount++;
1006 
1007  // Perform minimization steps for a full epoch.
1008  trainingData.Shuffle();
1009  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1010  batches.clear();
1011  for (size_t j = 0; j < nThreads; j++) {
1012  batches.reserve(nThreads);
1013  batches.push_back(trainingData.GetBatch());
1014  }
1015  if (settings.momentum > 0.0) {
1016  minimizer.StepMomentum(net, nets, batches, settings.momentum);
1017  } else {
1018  minimizer.Step(net, nets, batches);
1019  }
1020  }
1021 
1022  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1023 
1024  // Compute test error.
1025  Double_t testError = 0.0;
1026  for (auto batch : testData) {
1027  auto inputMatrix = batch.GetInput();
1028  auto outputMatrix = batch.GetOutput();
1029  testError += testNet.Loss(inputMatrix, outputMatrix);
1030  }
1031  testError /= (Double_t) (nTestSamples / settings.batchSize);
1032 
1033  //Log the loss value
1034  fTrainHistory.AddValue("testError",stepCount,testError);
1035 
1036  end = std::chrono::system_clock::now();
1037 
1038  // Compute training error.
1039  Double_t trainingError = 0.0;
1040  for (auto batch : trainingData) {
1041  auto inputMatrix = batch.GetInput();
1042  auto outputMatrix = batch.GetOutput();
1043  trainingError += net.Loss(inputMatrix, outputMatrix);
1044  }
1045  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1046  //Log the loss value
1047  fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1048 
1049  // Compute numerical throughput.
1050  std::chrono::duration<double> elapsed_seconds = end - start;
1051  double seconds = elapsed_seconds.count();
1052  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1053  nFlops *= net.GetNFlops() * 1e-9;
1054 
1055  converged = minimizer.HasConverged(testError);
1056  start = std::chrono::system_clock::now();
1057 
1058  if (fInteractive) {
1059  fInteractive->AddPoint(stepCount, trainingError, testError);
1060  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1061  / minimizer.GetConvergenceSteps ();
1062  if (fExitFromTraining) break;
1063  } else {
1064  Log() << std::setw(10) << stepCount << " | "
1065  << std::setw(12) << trainingError
1066  << std::setw(12) << testError
1067  << std::setw(12) << nFlops / seconds
1068  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1069  if (converged) {
1070  Log() << Endl;
1071  }
1072  }
1073  }
1074  }
1075  for (size_t l = 0; l < net.GetDepth(); l++) {
1076  fNet.GetLayer(l).GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1077  fNet.GetLayer(l).GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1078  }
1079  }
1080 
1081 #else // DNNCUDA flag not set.
1082 
1083  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1084  "you have CUDA installed and it was successfully "
1085  "detected by CMAKE." << Endl;
1086 #endif // DNNCUDA
1087 }
1088 
1089 ////////////////////////////////////////////////////////////////////////////////
1090 
1092 {
1093 
1094 #ifdef DNNCPU // Included only if DNNCPU flag is set.
1095  Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
1096 
1097  size_t nValidationSamples = GetNumValidationSamples();
1098  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1099  size_t nTestSamples = nValidationSamples;
1100 
1101  Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
1102  Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
1103 
1104  fNet.Initialize(fWeightInitialization);
1105 
1106  size_t trainingPhase = 1;
1107  for (TTrainingSettings & settings : fTrainingSettings) {
1108 
1109  if (fInteractive){
1110  fInteractive->ClearGraphs();
1111  }
1112 
1113  Log() << "Training phase " << trainingPhase << " of "
1114  << fTrainingSettings.size() << ":" << Endl;
1115  trainingPhase++;
1116 
1117  TNet<TCpu<>> net(settings.batchSize, fNet);
1118  net.SetWeightDecay(settings.weightDecay);
1119  net.SetRegularization(settings.regularization);
1120  // Need to convert dropoutprobabilities to conventions used
1121  // by backend implementation.
1122  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1123  for (auto & p : dropoutVector) {
1124  p = 1.0 - p;
1125  }
1126  net.SetDropoutProbabilities(dropoutVector);
1127  net.InitializeGradients();
1128  auto testNet = net.CreateClone(settings.batchSize);
1129 
1130  using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
1131 
1132  // Split training data into training and validation set
1133  const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
1134  const std::vector<Event *> trainingInputData =
1135  std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1136  const std::vector<Event *> testInputData =
1137  std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1138 
1139  if (trainingInputData.size() != nTrainingSamples) {
1140  Log() << kFATAL << "Inconsistent training sample size" << Endl;
1141  }
1142  if (testInputData.size() != nTestSamples) {
1143  Log() << kFATAL << "Inconsistent test sample size" << Endl;
1144  }
1145 
1146  size_t nThreads = 1;
1147  TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
1148  TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
1149  DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1150  net.GetBatchSize(), net.GetInputWidth(),
1151  net.GetOutputWidth(), nThreads);
1152  DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1153  net.GetInputWidth(), net.GetOutputWidth(),
1154  nThreads);
1155  DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1156  settings.convergenceSteps,
1157  settings.testInterval);
1158 
1159  std::vector<TNet<TCpu<>>> nets{};
1160  std::vector<TBatch<TCpu<>>> batches{};
1161  nets.reserve(nThreads);
1162  for (size_t i = 0; i < nThreads; i++) {
1163  nets.push_back(net);
1164  for (size_t j = 0; j < net.GetDepth(); j++)
1165  {
1166  auto &masterLayer = net.GetLayer(j);
1167  auto &layer = nets.back().GetLayer(j);
1168  TCpu<>::Copy(layer.GetWeights(),
1169  masterLayer.GetWeights());
1170  TCpu<>::Copy(layer.GetBiases(),
1171  masterLayer.GetBiases());
1172  }
1173  }
1174 
1175  bool converged = false;
1176  size_t stepCount = 0;
1177  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1178 
1179  std::chrono::time_point<std::chrono::system_clock> start, end;
1180  start = std::chrono::system_clock::now();
1181 
1182  if (!fInteractive) {
1183  Log() << std::setw(10) << "Epoch" << " | "
1184  << std::setw(12) << "Train Err."
1185  << std::setw(12) << "Test Err."
1186  << std::setw(12) << "GFLOP/s"
1187  << std::setw(12) << "Conv. Steps" << Endl;
1188  std::string separator(62, '-');
1189  Log() << separator << Endl;
1190  }
1191 
1192  while (!converged)
1193  {
1194  stepCount++;
1195  // Perform minimization steps for a full epoch.
1196  trainingData.Shuffle();
1197  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1198  batches.clear();
1199  for (size_t j = 0; j < nThreads; j++) {
1200  batches.reserve(nThreads);
1201  batches.push_back(trainingData.GetBatch());
1202  }
1203  if (settings.momentum > 0.0) {
1204  minimizer.StepMomentum(net, nets, batches, settings.momentum);
1205  } else {
1206  minimizer.Step(net, nets, batches);
1207  }
1208  }
1209 
1210  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1211 
1212  // Compute test error.
1213  Double_t testError = 0.0;
1214  for (auto batch : testData) {
1215  auto inputMatrix = batch.GetInput();
1216  auto outputMatrix = batch.GetOutput();
1217  auto weightMatrix = batch.GetWeights();
1218  testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1219  }
1220  testError /= (Double_t) (nTestSamples / settings.batchSize);
1221 
1222  //Log the loss value
1223  fTrainHistory.AddValue("testError",stepCount,testError);
1224 
1225  end = std::chrono::system_clock::now();
1226 
1227  // Compute training error.
1228  Double_t trainingError = 0.0;
1229  for (auto batch : trainingData) {
1230  auto inputMatrix = batch.GetInput();
1231  auto outputMatrix = batch.GetOutput();
1232  auto weightMatrix = batch.GetWeights();
1233  trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1234  }
1235  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1236 
1237  //Log the loss value
1238  fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1239 
1240  if (fInteractive){
1241  fInteractive->AddPoint(stepCount, trainingError, testError);
1242  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1243  if (fExitFromTraining) break;
1244  }
1245 
1246  // Compute numerical throughput.
1247  std::chrono::duration<double> elapsed_seconds = end - start;
1248  double seconds = elapsed_seconds.count();
1249  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1250  nFlops *= net.GetNFlops() * 1e-9;
1251 
1252  converged = minimizer.HasConverged(testError);
1253  start = std::chrono::system_clock::now();
1254 
1255  if (fInteractive) {
1256  fInteractive->AddPoint(stepCount, trainingError, testError);
1257  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1258  / minimizer.GetConvergenceSteps ();
1259  if (fExitFromTraining) break;
1260  } else {
1261  Log() << std::setw(10) << stepCount << " | "
1262  << std::setw(12) << trainingError
1263  << std::setw(12) << testError
1264  << std::setw(12) << nFlops / seconds
1265  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1266  if (converged) {
1267  Log() << Endl;
1268  }
1269  }
1270  }
1271  }
1272 
1273 
1274  for (size_t l = 0; l < net.GetDepth(); l++) {
1275  auto & layer = fNet.GetLayer(l);
1276  layer.GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1277  layer.GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1278  }
1279  }
1280 
1281 #else // DNNCPU flag not set.
1282  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1283  "you have a BLAS implementation and it was successfully "
1284  "detected by CMake as well that the imt CMake flag is set." << Endl;
1285 #endif // DNNCPU
1286 }
1287 
1288 ////////////////////////////////////////////////////////////////////////////////
1289 
1291 {
1292  size_t nVariables = GetEvent()->GetNVariables();
1293  Matrix_t X(1, nVariables);
1294  Matrix_t YHat(1, 1);
1295 
1296  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1297  for (size_t i = 0; i < nVariables; i++) {
1298  X(0,i) = inputValues[i];
1299  }
1300 
1301  fNet.Prediction(YHat, X, fOutputFunction);
1302  return YHat(0,0);
1303 }
1304 
1305 ////////////////////////////////////////////////////////////////////////////////
1306 
1307 const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1308 {
1309  size_t nVariables = GetEvent()->GetNVariables();
1310  Matrix_t X(1, nVariables);
1311 
1312  const Event *ev = GetEvent();
1313  const std::vector<Float_t>& inputValues = ev->GetValues();
1314  for (size_t i = 0; i < nVariables; i++) {
1315  X(0,i) = inputValues[i];
1316  }
1317 
1318  size_t nTargets = std::max(1u, ev->GetNTargets());
1319  Matrix_t YHat(1, nTargets);
1320  std::vector<Float_t> output(nTargets);
1321  auto net = fNet.CreateClone(1);
1322  net.Prediction(YHat, X, fOutputFunction);
1323 
1324  for (size_t i = 0; i < nTargets; i++)
1325  output[i] = YHat(0, i);
1326 
1327  if (fRegressionReturnVal == NULL) {
1328  fRegressionReturnVal = new std::vector<Float_t>();
1329  }
1330  fRegressionReturnVal->clear();
1331 
1332  Event * evT = new Event(*ev);
1333  for (size_t i = 0; i < nTargets; ++i) {
1334  evT->SetTarget(i, output[i]);
1335  }
1336 
1337  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1338  for (size_t i = 0; i < nTargets; ++i) {
1339  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1340  }
1341  delete evT;
1342  return *fRegressionReturnVal;
1343 }
1344 
1345 const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1346 {
1347  size_t nVariables = GetEvent()->GetNVariables();
1348  Matrix_t X(1, nVariables);
1349  Matrix_t YHat(1, DataInfo().GetNClasses());
1350  if (fMulticlassReturnVal == NULL) {
1351  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1352  }
1353 
1354  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1355  for (size_t i = 0; i < nVariables; i++) {
1356  X(0,i) = inputValues[i];
1357  }
1358 
1359  fNet.Prediction(YHat, X, fOutputFunction);
1360  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1361  (*fMulticlassReturnVal)[i] = YHat(0, i);
1362  }
1363  return *fMulticlassReturnVal;
1364 }
1365 
1366 ////////////////////////////////////////////////////////////////////////////////
1367 
1368 void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1369 {
1370  void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1371  Int_t inputWidth = fNet.GetInputWidth();
1372  Int_t depth = fNet.GetDepth();
1373  char lossFunction = static_cast<char>(fNet.GetLossFunction());
1374  gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1375  gTools().StringFromInt(inputWidth));
1376  gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1377  gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1378  gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1379  TString(static_cast<char>(fOutputFunction)));
1380 
1381  for (Int_t i = 0; i < depth; i++) {
1382  const auto& layer = fNet.GetLayer(i);
1383  auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1384  int activationFunction = static_cast<int>(layer.GetActivationFunction());
1385  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1386  TString::Itoa(activationFunction, 10));
1387  WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1388  WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1389  }
1390 }
1391 
1392 ////////////////////////////////////////////////////////////////////////////////
1393 
1395 {
1396  auto netXML = gTools().GetChild(rootXML, "Weights");
1397  if (!netXML){
1398  netXML = rootXML;
1399  }
1400 
1401  fNet.Clear();
1402  fNet.SetBatchSize(1);
1403 
1404  size_t inputWidth, depth;
1405  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1406  gTools().ReadAttr(netXML, "Depth", depth);
1407  char lossFunctionChar;
1408  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1409  char outputFunctionChar;
1410  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1411 
1412  fNet.SetInputWidth(inputWidth);
1413  fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1414  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1415 
1416  size_t previousWidth = inputWidth;
1417  auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1418  for (size_t i = 0; i < depth; i++) {
1419  TString fString;
1421 
1422  // Read activation function.
1423  gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1424  f = static_cast<EActivationFunction>(fString.Atoi());
1425 
1426  // Read number of neurons.
1427  size_t width;
1428  auto matrixXML = gTools().GetChild(layerXML, "Weights");
1429  gTools().ReadAttr(matrixXML, "rows", width);
1430 
1431  fNet.AddLayer(width, f);
1432  TMatrixT<Double_t> weights(width, previousWidth);
1433  TMatrixT<Double_t> biases(width, 1);
1434  ReadMatrixXML(layerXML, "Weights", weights);
1435  ReadMatrixXML(layerXML, "Biases", biases);
1436  fNet.GetLayer(i).GetWeights() = weights;
1437  fNet.GetLayer(i).GetBiases() = biases;
1438 
1439  layerXML = gTools().GetNextChild(layerXML);
1440  previousWidth = width;
1441  }
1442 }
1443 
1444 ////////////////////////////////////////////////////////////////////////////////
1445 
1446 void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1447 {
1448 }
1449 
1450 ////////////////////////////////////////////////////////////////////////////////
1451 
1453 {
1454  fRanking = new Ranking( GetName(), "Importance" );
1455  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1456  fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1457  }
1458  return fRanking;
1459 }
1460 
1461 ////////////////////////////////////////////////////////////////////////////////
1462 
1463 void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1464  const TString& /*className*/ ) const
1465 {
1466 }
1467 
1468 ////////////////////////////////////////////////////////////////////////////////
1469 
1471 {
1472  // get help message text
1473  //
1474  // typical length of text line:
1475  // "|--------------------------------------------------------------|"
1476  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1477  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1478 
1479  Log() << Endl;
1480  Log() << col << "--- Short description:" << colres << Endl;
1481  Log() << Endl;
1482  Log() << "The DNN neural network is a feedforward" << Endl;
1483  Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1484  Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1485  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1486  Log() << "signal and one background, regression or multiclass). " << Endl;
1487  Log() << Endl;
1488  Log() << col << "--- Performance optimisation:" << colres << Endl;
1489  Log() << Endl;
1490 
1491  const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1492 reduction of overfitting: \n \
1493 \n \
1494  - different training settings can be stacked. Such that the initial training \n\
1495  is done with a large learning rate and a large drop out fraction whilst \n \
1496  in a later stage learning rate and drop out can be reduced. \n \
1497  - drop out \n \
1498  [recommended: \n \
1499  initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1500  later training stage: 0.1 or 0.0 for all layers \n \
1501  final training stage: 0.0] \n \
1502  Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1503  nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1504  - L1 and L2 regularization are available \n \
1505  - Minibatches \n \
1506  [recommended 10 - 150] \n \
1507  Arbitrary mini-batch sizes can be chosen. \n \
1508  - Multithreading \n \
1509  [recommended: True] \n \
1510  Multithreading can be turned on. The minibatches are distributed to the available \n \
1511  cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1512  \n \
1513  Options: \n \
1514  \"Layout\": \n \
1515  - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1516  - meaning: \n \
1517  . two hidden layers (separated by \",\") \n \
1518  . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1519  . the activation function for the output layer is LINEAR \n \
1520  . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1521  . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1522  . the number of nodes in the output layer is determined by the number of output nodes \n \
1523  and can therefore not be chosen freely. \n \
1524  \n \
1525  \"ErrorStrategy\": \n \
1526  - SUMOFSQUARES \n \
1527  The error of the neural net is determined by a sum-of-squares error function \n \
1528  For regression, this is the only possible choice. \n \
1529  - CROSSENTROPY \n \
1530  The error of the neural net is determined by a cross entropy function. The \n \
1531  output values are automatically (internally) transformed into probabilities \n \
1532  using a sigmoid function. \n \
1533  For signal/background classification this is the default choice. \n \
1534  For multiclass using cross entropy more than one or no output classes \n \
1535  can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1536  A and C is true, Event 2: C is true, ...) \n \
1537  - MUTUALEXCLUSIVE \n \
1538  In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1539  \n \
1540  \"WeightInitialization\" \n \
1541  - XAVIER \n \
1542  [recommended] \n \
1543  \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1544  such that the variance of the values of the nodes is preserved for each layer. \n \
1545  - XAVIERUNIFORM \n \
1546  The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1547  - LAYERSIZE \n \
1548  Random values scaled by the layer size \n \
1549  \n \
1550  \"TrainingStrategy\" \n \
1551  - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1552  - explanation: two stacked training settings separated by \"|\" \n \
1553  . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1554  . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1555  . LearningRate : \n \
1556  - recommended for classification: 0.1 initially, 1e-4 later \n \
1557  - recommended for regression: 1e-4 and less \n \
1558  . Momentum : \n \
1559  preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1560  . Repetitions : \n \
1561  train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1562  . ConvergenceSteps : \n \
1563  Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1564  of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1565  cycle the test samples are evaluated and thus the convergence is checked) \n \
1566  . BatchSize \n \
1567  Size of the mini-batches. \n \
1568  . TestRepetitions \n \
1569  Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1570  . WeightDecay \n \
1571  If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1572  . Renormalize \n \
1573  NONE, L1 (|w|) or L2 (w^2) \n \
1574  . DropConfig \n \
1575  Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1576  in the DropConfig. \n \
1577  [example: DropConfig=0.0+0.5+0.3 \n \
1578  meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1579  in layer 2 \n \
1580  recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1581  turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1582  turned on (0.0) in later training stages] \n \
1583  . DropRepetitions \n \
1584  Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1585  [recommended : 1] \n \
1586  . Multithreading \n \
1587  turn on multithreading [recommended: True] \n \
1588  \n";
1589  Log () << txt << Endl;
1590 }
1591 
1592 } // namespace TMVA
l
auto * l
Definition: textangle.C:4
TMVA::DNN::EInitialization::kUniform
@ kUniform
ROOT::TMetaUtils::propNames::separator
static const std::string separator("@@@")
TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:583
TMVA::MethodDNN::TTrainingSettings::testInterval
size_t testInterval
Definition: MethodDNN.h:92
TMVA::MethodDNN::GetRegressionValues
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1307
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
TMVA::MethodDL::fOutputFunction
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:182
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TMVA::MethodDNN::ReadWeightsFromStream
virtual void ReadWeightsFromStream(std::istream &)=0
TMVA::DNN::EOutputFunction::kSigmoid
@ kSigmoid
TObjArray
An array of TObjects.
Definition: TObjArray.h:37
f
#define f(i)
Definition: RSha256.hxx:104
TMVA::DNN::Steepest
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
TMVA::DNN::TGradientDescent::StepMomentum
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t >> &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Definition: Minimizers.h:437
TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:712
TString::Atoi
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TMVA::DNN::TGradientDescent::HasConverged
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Definition: Minimizers.h:666
TFormula
The Formula class.
Definition: TFormula.h:87
TString::Strip
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TString::Data
const char * Data() const
Definition: TString.h:369
TMVA::MethodDNN::HasAnalysisType
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TMVA::DNN::EnumFunction
EnumFunction
Definition: NeuralNet.h:157
TMVA::DNN::TNet::CreateClone
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size.
Definition: Net.h:212
TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48
TMVA::DNN::TNet::GetNFlops
Scalar_t GetNFlops()
Definition: Net.h:347
TObjString.h
TMVA::DNN::Net::setInputSize
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1092
TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
TMVA::DNN::ERegularization::kL1
@ kL1
r
ROOT::R::TRInterface & r
Definition: Object.C:4
IMethod.h
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
output
static void output(int code)
Definition: gifencode.c:226
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Ranking.h
TMVA::DNN::TNet::SetWeightDecay
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:152
TMVA::MethodDNN::GetHelpMessage
void GetHelpMessage() const
Definition: MethodDNN.cxx:1470
TMVA::MethodDNN::TrainGpu
void TrainGpu()
Definition: MethodDNN.cxx:901
width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
TMVA::DNN::TGradientDescent::GetConvergenceCount
size_t GetConvergenceCount() const
Definition: Minimizers.h:160
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
TMVA::MethodDNN::ProcessOptions
void ProcessOptions()
Definition: MethodDNN.cxx:417
TMVA::MethodDNN::TTrainingSettings::regularization
DNN::ERegularization regularization
Definition: MethodDNN.h:94
TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
Int_t
int Int_t
Definition: RtypesCore.h:45
TObjString::GetString
const TString & GetString() const
Definition: TObjString.h:46
TMVA::DNN::Net::setErrorFunction
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1096
TMVA::DNN::ELossFunction::kCrossEntropy
@ kCrossEntropy
MethodDNN.h
TMVA::MethodDNN::TTrainingSettings::convergenceSteps
size_t convergenceSteps
Definition: MethodDNN.h:93
TMVA::MethodDNN::TTrainingSettings::dropoutProbabilities
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:98
TMVA::DNN::TGradientDescent::GetTestInterval
size_t GetTestInterval() const
Definition: Minimizers.h:164
TMVA::DNN::ERegularization::kNone
@ kNone
TXMLEngine::GetChild
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
Definition: TXMLEngine.cxx:1143
TMVA::DNN::EActivationFunction::kSoftSign
@ kSoftSign
TMVA::MethodDNN::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:82
TMVA::MethodDNN
Deep Neural Network Implementation.
Definition: MethodDNN.h:77
MethodBase.h
TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
TMVA::DNN::TNet::GetOutputWidth
size_t GetOutputWidth() const
Definition: Net.h:144
TMVA::DNN::EActivationFunction::kGauss
@ kGauss
TMVA::Rank
Definition: Ranking.h:76
TMVA::MethodDNN::KeyValueVector_t
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:87
TMVA::MethodDL::fWeightInitialization
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:181
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
TMVA::DNN::TNet::GetInputWidth
size_t GetInputWidth() const
Definition: Net.h:143
TMVA::DNN::EActivationFunction::kTanh
@ kTanh
BatchHelpers::block
constexpr size_t block
Definition: BatchHelpers.h:29
TString
Basic string class.
Definition: TString.h:136
TMatrixT
TMatrixT.
Definition: TMatrixT.h:39
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TMVA::MethodDNN::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1394
TString.h
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
TString::Itoa
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2025
TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:268
TString::kBoth
@ kBoth
Definition: TString.h:267
TMVA::DNN::TGradientDescent
Definition: Minimizers.h:56
TString::ToUpper
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjString
Collectable string class.
Definition: TObjString.h:28
TMVA::DNN::TNet::SetRegularization
void SetRegularization(ERegularization R)
Definition: Net.h:150
TMVA::MethodDNN::Init
void Init()
TMVA::MethodDNN::TTrainingSettings::multithreading
bool multithreading
Definition: MethodDNN.h:99
Monitoring.h
NeuralNet.h
TMVA::MethodDNN::ParseKeyValueString
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
MsgLogger.h
Timer.h
TMVA::DNN::Net::SetIpythonInteractive
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1283
TMVA::Event::GetValues
std::vector< Float_t > & GetValues()
Definition: Event.h:94
TString::kTrailing
@ kTrailing
Definition: TString.h:267
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
h
#define h(i)
Definition: RSha256.hxx:106
TMVA::Config::WriteOptionsReference
Bool_t WriteOptionsReference() const
Definition: Config.h:67
TMVA::DNN::EActivationFunction::kSymmRelu
@ kSymmRelu
TMVA::DNN::TNet
Generic neural network class.
Definition: Net.h:49
TMVA::DNN::TCuda::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
TMVA::DNN::Net::addLayer
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1094
TMVA::DNN::ERegularization::kL2
@ kL2
TMVA::gConfig
Config & gConfig()
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
NONE
#define NONE
Definition: Rotated.cxx:52
TMVA::DNN::EActivationFunction::kRelu
@ kRelu
TMVA::DNN::EnumRegularization
EnumRegularization
Definition: NeuralNet.h:173
TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TMVA::MethodDNN::GetNumValidationSamples
UInt_t GetNumValidationSamples()
TMVA::DNN::Settings
Settings for the training of the neural net.
Definition: NeuralNet.h:730
Net.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
TString::BeginsWith
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:615
TMVA::DNN::Layer
Layer defines the layout of a layer.
Definition: NeuralNet.h:673
double
double
Definition: Converters.cxx:921
TMVA::DNN::TNet::Loss
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
TMVA::DNN::TNet::InitializeGradients
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:263
TMVA::DNN::EOutputFunction::kIdentity
@ kIdentity
TMVA::DNN::TNet::SetDropoutProbabilities
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:378
TMVA::MethodDNN::TTrainingSettings::momentum
Double_t momentum
Definition: MethodDNN.h:96
TMVA::DNN::EOutputFunction::kSoftmax
@ kSoftmax
Types.h
Configurable.h
TMVA::fetchValue
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:313
TMVA::DNN::TCpu::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
unsigned int
TMVA::MethodDNN::CreateRanking
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1452
TMVA::DNN::ModeOutputValues
ModeOutputValues
Definition: NeuralNet.h:179
TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::DNN::TNet::GetBatchSize
size_t GetBatchSize() const
Definition: Net.h:138
TMVA::MethodDNN::TrainCpu
void TrainCpu()
Definition: MethodDNN.cxx:1091
TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
TMVA::DNN::Net::setOutputSize
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1093
TMVA::DNN::TDataLoader
TDataLoader.
Definition: DataLoader.h:129
Pattern
Definition: Pattern.h:8
TMVA::MethodDNN::~MethodDNN
virtual ~MethodDNN()
TMVA::DNN::Net::train
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::MethodDNN::TTrainingSettings::learningRate
Double_t learningRate
Definition: MethodDNN.h:95
TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
TMVA::MethodDNN::TTrainingSettings::weightDecay
Double_t weightDecay
Definition: MethodDNN.h:97
Reference.h
TMVA::DNN::EInitialization::kGauss
@ kGauss
TMVA::MethodDNN::DeclareOptions
void DeclareOptions()
TMVA::Event
Definition: Event.h:51
TMVA::MethodDNN::TTrainingSettings
Definition: MethodDNN.h:90
TMVA::DNN::Net::initializeWeights
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
TMVA::MethodDNN::GetMulticlassValues
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1345
TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
TMVA::DNN::EActivationFunction::kIdentity
@ kIdentity
TIter
Definition: TCollection.h:233
TMVA::MethodDNN::Train
void Train()
Definition: MethodDNN.cxx:658
TMVA::DNN::TGradientDescent::Step
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Definition: Minimizers.h:330
TMVA::MethodDNN::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1368
TMVA::MethodDNN::GetMvaValue
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1290
TMVA::MethodDNN::ParseLayoutString
LayoutVector_t ParseLayoutString(TString layerSpec)
Tools.h
ClassifierFactory.h
type
int type
Definition: TGX11.cxx:121
TMVA::DNN::TNet::GetDepth
size_t GetDepth() const
Definition: Net.h:137
TMVA::DNN::TNet::GetLayer
Layer_t & GetLayer(size_t i)
Definition: Net.h:139
TMVA::gTools
Tools & gTools()
TMVA::MethodDNN::MethodDNN
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
TMVA::Event::GetNTargets
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
TMVA::MethodDNN::TTrainingSettings::batchSize
size_t batchSize
Definition: MethodDNN.h:91
TMVA::MethodDNN::MakeClassSpecific
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1463
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::DNN::fSteepest
@ fSteepest
SGD.
Definition: NeuralNet.h:322
int
TMVA::DNN::Net
neural net
Definition: NeuralNet.h:1062
TMVA::DNN::TGradientDescent::GetConvergenceSteps
size_t GetConvergenceSteps() const
Definition: Minimizers.h:161
g
#define g(i)
Definition: RSha256.hxx:105