Logo ROOT   6.08/07
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDNN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A neural network implementation *
12  * *
13  * Authors (alphabetical): *
14  * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //______________________________________________________________________________
29 //
30 // Deep Neural Network Implementation
31 //______________________________________________________________________________
32 
33 #include "TString.h"
34 #include "TTree.h"
35 #include "TFile.h"
36 #include "TFormula.h"
37 
38 #include "TMVA/ClassifierFactory.h"
39 #include "TMVA/Configurable.h"
40 #include "TMVA/IMethod.h"
41 #include "TMVA/MsgLogger.h"
42 #include "TMVA/MethodBase.h"
43 #include "TMVA/MethodDNN.h"
44 #include "TMVA/Timer.h"
45 #include "TMVA/Types.h"
46 #include "TMVA/Tools.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Ranking.h"
49 
50 #include "TMVA/DNN/Net.h"
52 
53 #include "TMVA/NeuralNet.h"
54 #include "TMVA/Monitoring.h"
55 
56 #include <algorithm>
57 #include <iostream>
58 #include <string>
59 #include <iomanip>
60 
61 REGISTER_METHOD(DNN)
62 
64 
65 namespace TMVA
66 {
67  using namespace DNN;
68 
69 //______________________________________________________________________________
71  const TString& methodTitle,
72  DataSetInfo& theData,
73  const TString& theOption)
74  : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption),
75  fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
76  fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
77  fTrainingSettings(), fResume(false), fSettings()
78 {
79  // standard constructor
80 }
81 
82 //______________________________________________________________________________
84  const TString& theWeightFile)
85  : MethodBase( Types::kDNN, theData, theWeightFile),
89 {
90  // constructor from a weight file
91 }
92 
93 //______________________________________________________________________________
95 {
96  // destructor
97  // nothing to be done
98 }
99 
100 //______________________________________________________________________________
102  UInt_t numberClasses,
103  UInt_t /*numberTargets*/ )
104 {
105  // MLP can handle classification with 2 classes and regression with
106  // one regression-target
107  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
108  if (type == Types::kMulticlass ) return kTRUE;
109  if (type == Types::kRegression ) return kTRUE;
110 
111  return kFALSE;
112 }
113 
114 //______________________________________________________________________________
116 {
117  // default initializations
118 }
119 
120 //______________________________________________________________________________
122 {
123  // Options to be set in the option string:
124  //
125  // LearningRate <float> DNN learning rate parameter.
126  // DecayRate <float> Decay rate for learning parameter.
127  // TestRate <int> Period of validation set error computation.
128  // BatchSize <int> Number of event per batch.
129 
130  DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
131  "Layout",
132  "Layou of the network.");
133 
134  DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
135  "ErrorStrategy",
136  "Loss function: Mean squared error (regression)"
137  " or cross entropy (binary classifcation).");
138  AddPreDefVal(TString("CROSSENTROPY"));
139  AddPreDefVal(TString("SUMOFSQUARES"));
140 
142  "WeightInitialization",
143  "Weight initialization strategy");
144  AddPreDefVal(TString("XAVIER"));
145  AddPreDefVal(TString("XAVIERUNIFORM"));
146 
148  "Architecture",
149  "Which architecture to perfrom the training on.");
150  AddPreDefVal(TString("STANDARD"));
151  AddPreDefVal(TString("CPU"));
152  AddPreDefVal(TString("GPU"));
153  AddPreDefVal(TString("OPENCL"));
154 
156  fTrainingStrategyString = "LearningRate=1e-1,"
157  "Momentum=0.3,"
158  "Repetitions=3,"
159  "ConvergenceSteps=50,"
160  "BatchSize=30,"
161  "TestRepetitions=7,"
162  "WeightDecay=0.0,"
163  "Renormalize=L2,"
164  "DropConfig=0.0,"
165  "DropRepetitions=5|LearningRate=1e-4,"
166  "Momentum=0.3,"
167  "Repetitions=3,"
168  "ConvergenceSteps=50,"
169  "BatchSize=20,"
170  "TestRepetitions=7,"
171  "WeightDecay=0.001,"
172  "Renormalize=L2,"
173  "DropConfig=0.0+0.5+0.5,"
174  "DropRepetitions=5,"
175  "Multithreading=True",
176  "TrainingStrategy",
177  "Defines the training strategies.");
178 }
179 
180 //______________________________________________________________________________
182  -> LayoutVector_t
183 {
184  // parse layout specification string and return a vector, each entry
185  // containing the number of neurons to go in each successive layer
186  LayoutVector_t layout;
187  const TString layerDelimiter(",");
188  const TString subDelimiter("|");
189 
190  const size_t inputSize = GetNvar();
191 
192  TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
193  TIter nextLayer (layerStrings);
194  TObjString* layerString = (TObjString*)nextLayer ();
195 
196  for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
197  int numNodes = 0;
198  EActivationFunction activationFunction = EActivationFunction::kTanh;
199 
200  TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
201  TIter nextToken (subStrings);
202  TObjString* token = (TObjString *) nextToken();
203  int idxToken = 0;
204  for (; token != nullptr; token = (TObjString *) nextToken()) {
205  switch (idxToken)
206  {
207  case 0:
208  {
209  TString strActFnc (token->GetString ());
210  if (strActFnc == "RELU") {
211  activationFunction = DNN::EActivationFunction::kRelu;
212  } else if (strActFnc == "TANH") {
213  activationFunction = DNN::EActivationFunction::kTanh;
214  } else if (strActFnc == "SYMMRELU") {
215  activationFunction = DNN::EActivationFunction::kSymmRelu;
216  } else if (strActFnc == "SOFTSIGN") {
217  activationFunction = DNN::EActivationFunction::kSoftSign;
218  } else if (strActFnc == "SIGMOID") {
219  activationFunction = DNN::EActivationFunction::kSigmoid;
220  } else if (strActFnc == "LINEAR") {
221  activationFunction = DNN::EActivationFunction::kIdentity;
222  } else if (strActFnc == "GAUSS") {
223  activationFunction = DNN::EActivationFunction::kGauss;
224  }
225  }
226  break;
227  case 1: // number of nodes
228  {
229  TString strNumNodes (token->GetString ());
230  TString strN ("x");
231  strNumNodes.ReplaceAll ("N", strN);
232  strNumNodes.ReplaceAll ("n", strN);
233  TFormula fml ("tmp",strNumNodes);
234  numNodes = fml.Eval (inputSize);
235  }
236  break;
237  }
238  ++idxToken;
239  }
240  layout.push_back(std::make_pair(numNodes, activationFunction));
241  }
242  return layout;
243 }
244 
245 // parse key value pairs in blocks -> return vector of blocks with map of key value pairs
246 //______________________________________________________________________________
248  TString blockDelim,
249  TString tokenDelim)
251 {
252  KeyValueVector_t blockKeyValues;
253  const TString keyValueDelim ("=");
254 
255  TObjArray* blockStrings = parseString.Tokenize (blockDelim);
256  TIter nextBlock (blockStrings);
257  TObjString* blockString = (TObjString *) nextBlock();
258 
259  for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
260  {
261  blockKeyValues.push_back (std::map<TString,TString>());
262  std::map<TString,TString>& currentBlock = blockKeyValues.back ();
263 
264  TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
265  TIter nextToken (subStrings);
266  TObjString* token = (TObjString*)nextToken ();
267 
268  for (; token != nullptr; token = (TObjString *)nextToken())
269  {
270  TString strKeyValue (token->GetString ());
271  int delimPos = strKeyValue.First (keyValueDelim.Data ());
272  if (delimPos <= 0)
273  continue;
274 
275  TString strKey = TString (strKeyValue (0, delimPos));
276  strKey.ToUpper();
277  TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
278 
279  strKey.Strip (TString::kBoth, ' ');
280  strValue.Strip (TString::kBoth, ' ');
281 
282  currentBlock.insert (std::make_pair (strKey, strValue));
283  }
284  }
285  return blockKeyValues;
286 }
287 
288 //______________________________________________________________________________
289 TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
290 {
291  key.ToUpper ();
292  std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
293  if (it == keyValueMap.end()) {
294  return TString ("");
295  }
296  return it->second;
297 }
298 
299 //______________________________________________________________________________
300 template <typename T>
301 T fetchValue(const std::map<TString,TString>& keyValueMap,
302  TString key,
303  T defaultValue);
304 
305 //______________________________________________________________________________
306 template <>
307 int fetchValue(const std::map<TString,TString>& keyValueMap,
308  TString key,
309  int defaultValue)
310 {
311  TString value (fetchValue (keyValueMap, key));
312  if (value == "") {
313  return defaultValue;
314  }
315  return value.Atoi ();
316 }
317 
318 //______________________________________________________________________________
319 template <>
320 double fetchValue (const std::map<TString,TString>& keyValueMap,
321  TString key, double defaultValue)
322 {
323  TString value (fetchValue (keyValueMap, key));
324  if (value == "") {
325  return defaultValue;
326  }
327  return value.Atof ();
328 }
329 
330 //______________________________________________________________________________
331 template <>
332 TString fetchValue (const std::map<TString,TString>& keyValueMap,
333  TString key, TString defaultValue)
334 {
335  TString value (fetchValue (keyValueMap, key));
336  if (value == "") {
337  return defaultValue;
338  }
339  return value;
340 }
341 
342 //______________________________________________________________________________
343 template <>
344 bool fetchValue (const std::map<TString,TString>& keyValueMap,
345  TString key, bool defaultValue)
346 {
347  TString value (fetchValue (keyValueMap, key));
348  if (value == "") {
349  return defaultValue;
350  }
351  value.ToUpper ();
352  if (value == "TRUE" || value == "T" || value == "1") {
353  return true;
354  }
355  return false;
356 }
357 
358 //______________________________________________________________________________
359 template <>
360 std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
361  TString key,
362  std::vector<double> defaultValue)
363 {
364  TString parseString (fetchValue (keyValueMap, key));
365  if (parseString == "") {
366  return defaultValue;
367  }
368  parseString.ToUpper ();
369  std::vector<double> values;
370 
371  const TString tokenDelim ("+");
372  TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
373  TIter nextToken (tokenStrings);
374  TObjString* tokenString = (TObjString*)nextToken ();
375  for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
376  std::stringstream sstr;
377  double currentValue;
378  sstr << tokenString->GetString ().Data ();
379  sstr >> currentValue;
380  values.push_back (currentValue);
381  }
382  return values;
383 }
384 
385 //______________________________________________________________________________
387 {
389  Log() << kINFO
390  << "Will ignore negative events in training!"
391  << Endl;
392  }
393 
394  //
395  // Set network structure.
396  //
397 
399  size_t inputSize = GetNVariables ();
400  size_t outputSize = 1;
401  if (GetNTargets() != 0) {
402  outputSize = GetNTargets();
403  } else if (DataInfo().GetNClasses() > 2) {
404  outputSize = DataInfo().GetNClasses();
405  }
406 
407  fNet.SetBatchSize(1);
408  fNet.SetInputWidth(inputSize);
409 
410  auto itLayout = std::begin (fLayout);
411  auto itLayoutEnd = std::end (fLayout)-1;
412  for ( ; itLayout != itLayoutEnd; ++itLayout) {
413  fNet.AddLayer((*itLayout).first, (*itLayout).second);
414  }
416 
417  //
418  // Loss function and output.
419  //
420 
423  {
424  if (fErrorStrategy == "SUMOFSQUARES") {
425  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
426  }
427  if (fErrorStrategy == "CROSSENTROPY") {
429  }
431  } else if (fAnalysisType == Types::kRegression) {
432  if (fErrorStrategy != "SUMOFSQUARES") {
433  Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
434  << " neural net error function. Setting error function to "
435  << " SUMOFSQUARES now." << Endl;
436  }
437  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
439  } else if (fAnalysisType == Types::kMulticlass) {
440  if (fErrorStrategy == "SUMOFSQUARES") {
441  fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
442  }
443  if (fErrorStrategy == "CROSSENTROPY") {
445  }
446  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
447  fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
448  }
450  }
451 
452  //
453  // Initialization
454  //
455 
456  if (fWeightInitializationString == "XAVIER") {
458  }
459  else if (fWeightInitializationString == "XAVIERUNIFORM") {
461  }
462  else {
464  }
465 
466  //
467  // Training settings.
468  //
469 
471  TString ("|"),
472  TString (","));
473  for (auto& block : strategyKeyValues) {
474  TTrainingSettings settings;
475 
476  settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
477  settings.batchSize = fetchValue(block, "BatchSize", 30);
478  settings.testInterval = fetchValue(block, "TestRepetitions", 7);
479  settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
480  settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
481  settings.momentum = fetchValue(block, "Momentum", 0.3);
482  settings.dropoutProbabilities = fetchValue(block, "DropConfig",
483  std::vector<Double_t>());
484 
485  TString regularization = fetchValue(block, "Regularization",
486  TString ("NONE"));
487  if (regularization == "L1") {
489  } else if (regularization == "L2") {
491  }
492 
493  TString strMultithreading = fetchValue(block, "Multithreading",
494  TString ("True"));
495  if (strMultithreading.BeginsWith ("T")) {
496  settings.multithreading = true;
497  } else {
498  settings.multithreading = false;
499  }
500 
501  fTrainingSettings.push_back(settings);
502  }
503 }
504 
505 //______________________________________________________________________________
507 {
509  std::vector<TString> titles = {"Error on training set", "Error on test set"};
510  fInteractive->Init(titles);
511  // JsMVA progress bar maximum (100%)
512  fIPyMaxIter = 100;
513  }
514 
515  if (fArchitectureString == "GPU") {
516  TrainGpu();
519  return;
520  } else if (fArchitectureString == "OpenCL") {
521  Log() << kFATAL << "OpenCL backend not yes supported." << Endl;
522  return;
523  } else if (fArchitectureString == "CPU") {
524  TrainCpu();
527  return;
528  }
529 
530  Log() << kINFO << "Using Standard Implementation.";
531 
532  std::vector<Pattern> trainPattern;
533  std::vector<Pattern> testPattern;
534 
535  const std::vector<TMVA::Event*>& eventCollectionTraining = GetEventCollection (Types::kTraining);
536  const std::vector<TMVA::Event*>& eventCollectionTesting = GetEventCollection (Types::kTesting);
537 
538  for (auto &event : eventCollectionTraining) {
539  const std::vector<Float_t>& values = event->GetValues();
541  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
542  trainPattern.push_back(Pattern (values.begin(),
543  values.end(),
544  outputValue,
545  event->GetWeight()));
546  trainPattern.back().addInput(1.0);
547  } else if (fAnalysisType == Types::kMulticlass) {
548  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
549  oneHot[event->GetClass()] = 1.0;
550  trainPattern.push_back(Pattern (values.begin(), values.end(),
551  oneHot.cbegin(), oneHot.cend(),
552  event->GetWeight()));
553  trainPattern.back().addInput(1.0);
554  } else {
555  const std::vector<Float_t>& targets = event->GetTargets ();
556  trainPattern.push_back(Pattern(values.begin(),
557  values.end(),
558  targets.begin(),
559  targets.end(),
560  event->GetWeight ()));
561  trainPattern.back ().addInput (1.0); // bias node
562  }
563  }
564 
565  for (auto &event : eventCollectionTesting) {
566  const std::vector<Float_t>& values = event->GetValues();
568  double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
569  testPattern.push_back(Pattern (values.begin(),
570  values.end(),
571  outputValue,
572  event->GetWeight()));
573  testPattern.back().addInput(1.0);
574  } else if (fAnalysisType == Types::kMulticlass) {
575  std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
576  oneHot[event->GetClass()] = 1.0;
577  testPattern.push_back(Pattern (values.begin(), values.end(),
578  oneHot.cbegin(), oneHot.cend(),
579  event->GetWeight()));
580  testPattern.back().addInput(1.0);
581  } else {
582  const std::vector<Float_t>& targets = event->GetTargets ();
583  testPattern.push_back(Pattern(values.begin(),
584  values.end(),
585  targets.begin(),
586  targets.end(),
587  event->GetWeight ()));
588  testPattern.back ().addInput (1.0); // bias node
589  }
590  }
591 
592  TMVA::DNN::Net net;
593  std::vector<double> weights;
594 
596 
597  net.setInputSize(fNet.GetInputWidth() + 1);
598  net.setOutputSize(fNet.GetOutputWidth() + 1);
599 
600  for (size_t i = 0; i < fNet.GetDepth(); i++) {
601  EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
602  EnumFunction g = EnumFunction::LINEAR;
603  switch(f) {
604  case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
605  case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
606  case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
607  case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
608  case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
609  case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
610  case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
611  }
612  if (i < fNet.GetDepth() - 1) {
613  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
614  } else {
615  ModeOutputValues h = ModeOutputValues::DIRECT;
616  switch(fOutputFunction) {
617  case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
618  case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
619  case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
620  }
621  net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
622  }
623  }
624 
625  switch(fNet.GetLossFunction()) {
626  case ELossFunction::kMeanSquaredError:
627  net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
628  break;
630  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
631  break;
632  case ELossFunction::kSoftmaxCrossEntropy:
633  net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
634  break;
635  }
636 
637  switch(fWeightInitialization) {
639  net.initializeWeights(WeightInitializationStrategy::XAVIER,
640  std::back_inserter(weights));
641  break;
643  net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
644  std::back_inserter(weights));
645  break;
646  default:
647  net.initializeWeights(WeightInitializationStrategy::XAVIER,
648  std::back_inserter(weights));
649  break;
650  }
651 
652  int idxSetting = 0;
653  for (auto s : fTrainingSettings) {
654 
656  switch(s.regularization) {
658  case ERegularization::kL1: r = EnumRegularization::L1; break;
659  case ERegularization::kL2: r = EnumRegularization::L2; break;
660  }
661 
662  Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
663  s.testInterval, s.weightDecay, r,
664  MinimizerType::fSteepest, s.learningRate,
665  s.momentum, 1, s.multithreading);
666  std::shared_ptr<Settings> ptrSettings(settings);
667  ptrSettings->setMonitoring (0);
668  Log() << kINFO
669  << "Training with learning rate = " << ptrSettings->learningRate ()
670  << ", momentum = " << ptrSettings->momentum ()
671  << ", repetitions = " << ptrSettings->repetitions ()
672  << Endl;
673 
674  ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
675  (idxSetting+1)*100.0/(fSettings.size ()));
676 
677  const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
678  if (!dropConfig.empty ()) {
679  Log () << kINFO << "Drop configuration" << Endl
680  << " drop repetitions = " << ptrSettings->dropRepetitions()
681  << Endl;
682  }
683 
684  int idx = 0;
685  for (auto f : dropConfig) {
686  Log () << kINFO << " Layer " << idx << " = " << f << Endl;
687  ++idx;
688  }
689  Log () << kINFO << Endl;
690 
691  DNN::Steepest minimizer(ptrSettings->learningRate(),
692  ptrSettings->momentum(),
693  ptrSettings->repetitions());
694  net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
695  ptrSettings.reset();
696  Log () << kINFO << Endl;
697  idxSetting++;
698  }
699  size_t weightIndex = 0;
700  for (size_t l = 0; l < fNet.GetDepth(); l++) {
701  auto & layerWeights = fNet.GetLayer(l).GetWeights();
702  for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
703  for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
704  layerWeights(i,j) = weights[weightIndex];
705  weightIndex++;
706  }
707  }
708  auto & layerBiases = fNet.GetLayer(l).GetBiases();
709  if (l == 0) {
710  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
711  layerBiases(i,0) = weights[weightIndex];
712  weightIndex++;
713  }
714  } else {
715  for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
716  layerBiases(i,0) = 0.0;
717  }
718  }
719  }
722 }
723 
724 //______________________________________________________________________________
726 {
727 
728 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
729 
730  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
731  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
732 
733  Log() << kINFO << "Start of neural network training on GPU." << Endl;
734 
735  size_t trainingPhase = 1;
737  for (TTrainingSettings & settings : fTrainingSettings) {
738 
739  if (fInteractive){
741  }
742 
743  TNet<TCuda<>> net(settings.batchSize, fNet);
744  net.SetWeightDecay(settings.weightDecay);
745  net.SetRegularization(settings.regularization);
746 
747  // Need to convert dropoutprobabilities to conventions used
748  // by backend implementation.
749  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
750  for (auto & p : dropoutVector) {
751  p = 1.0 - p;
752  }
753  net.SetDropoutProbabilities(dropoutVector);
754 
755  net.InitializeGradients();
756  auto testNet = net.CreateClone(settings.batchSize);
757 
758  Log() << kINFO << "Training phase " << trainingPhase << " of "
759  << fTrainingSettings.size() << ":" << Endl;
760  trainingPhase++;
761 
762  using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
763 
764  size_t nThreads = 1;
765  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
766  nTrainingSamples,
767  net.GetBatchSize(),
768  net.GetInputWidth(),
769  net.GetOutputWidth(), nThreads);
770  DataLoader_t testData(GetEventCollection(Types::kTesting),
771  nTestSamples,
772  testNet.GetBatchSize(),
773  net.GetInputWidth(),
774  net.GetOutputWidth(), nThreads);
775  DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
776  settings.convergenceSteps,
777  settings.testInterval);
778 
779  std::vector<TNet<TCuda<>>> nets{};
780  std::vector<TBatch<TCuda<>>> batches{};
781  nets.reserve(nThreads);
782  for (size_t i = 0; i < nThreads; i++) {
783  nets.push_back(net);
784  for (size_t j = 0; j < net.GetDepth(); j++)
785  {
786  auto &masterLayer = net.GetLayer(j);
787  auto &layer = nets.back().GetLayer(j);
788  TCuda<>::Copy(layer.GetWeights(),
789  masterLayer.GetWeights());
790  TCuda<>::Copy(layer.GetBiases(),
791  masterLayer.GetBiases());
792  }
793  }
794 
795  bool converged = false;
796  size_t stepCount = 0;
797  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
798 
799  std::chrono::time_point<std::chrono::system_clock> start, end;
800  start = std::chrono::system_clock::now();
801 
802  if (!fInteractive) {
803  Log() << std::setw(10) << "Epoch" << " | "
804  << std::setw(12) << "Train Err."
805  << std::setw(12) << "Test Err."
806  << std::setw(12) << "GFLOP/s"
807  << std::setw(12) << "Conv. Steps" << Endl;
808  std::string separator(62, '-');
809  Log() << separator << Endl;
810  }
811 
812  while (!converged)
813  {
814  stepCount++;
815 
816  // Perform minimization steps for a full epoch.
817  trainingData.Shuffle();
818  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
819  batches.clear();
820  for (size_t j = 0; j < nThreads; j++) {
821  batches.reserve(nThreads);
822  batches.push_back(trainingData.GetBatch());
823  }
824  if (settings.momentum > 0.0) {
825  minimizer.StepMomentum(net, nets, batches, settings.momentum);
826  } else {
827  minimizer.Step(net, nets, batches);
828  }
829  }
830 
831  if ((stepCount % minimizer.GetTestInterval()) == 0) {
832 
833  // Compute test error.
834  Double_t testError = 0.0;
835  for (auto batch : testData) {
836  auto inputMatrix = batch.GetInput();
837  auto outputMatrix = batch.GetOutput();
838  testError += testNet.Loss(inputMatrix, outputMatrix);
839  }
840  testError /= (Double_t) (nTestSamples / settings.batchSize);
841 
842  end = std::chrono::system_clock::now();
843 
844  // Compute training error.
845  Double_t trainingError = 0.0;
846  for (auto batch : trainingData) {
847  auto inputMatrix = batch.GetInput();
848  auto outputMatrix = batch.GetOutput();
849  trainingError += net.Loss(inputMatrix, outputMatrix);
850  }
851  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
852 
853  // Compute numerical throughput.
854  std::chrono::duration<double> elapsed_seconds = end - start;
855  double seconds = elapsed_seconds.count();
856  double nFlops = (double) (settings.testInterval * batchesInEpoch);
857  nFlops *= net.GetNFlops() * 1e-9;
858 
859  converged = minimizer.HasConverged(testError);
860  start = std::chrono::system_clock::now();
861 
862  if (fInteractive) {
863  fInteractive->AddPoint(stepCount, trainingError, testError);
864  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
865  / minimizer.GetConvergenceSteps ();
866  if (fExitFromTraining) break;
867  } else {
868  Log() << std::setw(10) << stepCount << " | "
869  << std::setw(12) << trainingError
870  << std::setw(12) << testError
871  << std::setw(12) << nFlops / seconds
872  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
873  if (converged) {
874  Log() << Endl;
875  }
876  }
877  }
878  }
879  for (size_t l = 0; l < net.GetDepth(); l++) {
880  fNet.GetLayer(l).GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
881  fNet.GetLayer(l).GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
882  }
883  }
884 
885 #else // DNNCUDA flag not set.
886 
887  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
888  "you have CUDA installed and it was successfully "
889  "detected by CMAKE." << Endl;
890 #endif // DNNCUDA
891 }
892 
893 //______________________________________________________________________________
895 {
896 
897 #ifdef DNNCPU // Included only if DNNCPU flag is set.
898 
899  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
900  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
901 
902  Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
903 
905 
906  size_t trainingPhase = 1;
907  for (TTrainingSettings & settings : fTrainingSettings) {
908 
909  if (fInteractive){
911  }
912 
913  Log() << "Training phase " << trainingPhase << " of "
914  << fTrainingSettings.size() << ":" << Endl;
915  trainingPhase++;
916 
917  TNet<TCpu<>> net(settings.batchSize, fNet);
918  net.SetWeightDecay(settings.weightDecay);
919  net.SetRegularization(settings.regularization);
920  // Need to convert dropoutprobabilities to conventions used
921  // by backend implementation.
922  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
923  for (auto & p : dropoutVector) {
924  p = 1.0 - p;
925  }
926  net.SetDropoutProbabilities(dropoutVector);
927  //net.SetDropoutProbabilities(settings.dropoutProbabilities);
928  net.InitializeGradients();
929  auto testNet = net.CreateClone(settings.batchSize);
930 
931  using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
932 
933  size_t nThreads = 1;
934  DataLoader_t trainingData(GetEventCollection(Types::kTraining),
935  nTrainingSamples,
936  net.GetBatchSize(),
937  net.GetInputWidth(),
938  net.GetOutputWidth(), nThreads);
939  DataLoader_t testData(GetEventCollection(Types::kTesting),
940  nTestSamples,
941  testNet.GetBatchSize(),
942  net.GetInputWidth(),
943  net.GetOutputWidth(), nThreads);
944  DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
945  settings.convergenceSteps,
946  settings.testInterval);
947 
948  std::vector<TNet<TCpu<>>> nets{};
949  std::vector<TBatch<TCpu<>>> batches{};
950  nets.reserve(nThreads);
951  for (size_t i = 0; i < nThreads; i++) {
952  nets.push_back(net);
953  for (size_t j = 0; j < net.GetDepth(); j++)
954  {
955  auto &masterLayer = net.GetLayer(j);
956  auto &layer = nets.back().GetLayer(j);
957  TCpu<>::Copy(layer.GetWeights(),
958  masterLayer.GetWeights());
959  TCpu<>::Copy(layer.GetBiases(),
960  masterLayer.GetBiases());
961  }
962  }
963 
964  bool converged = false;
965  size_t stepCount = 0;
966  size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
967 
968  std::chrono::time_point<std::chrono::system_clock> start, end;
969  start = std::chrono::system_clock::now();
970 
971  if (fInteractive) {
972  Log() << std::setw(10) << "Epoch" << " | "
973  << std::setw(12) << "Train Err."
974  << std::setw(12) << "Test Err."
975  << std::setw(12) << "GFLOP/s"
976  << std::setw(12) << "Conv. Steps" << Endl;
977  std::string separator(62, '-');
978  Log() << separator << Endl;
979  }
980 
981  while (!converged)
982  {
983  stepCount++;
984  // Perform minimization steps for a full epoch.
985  trainingData.Shuffle();
986  for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
987  batches.clear();
988  for (size_t j = 0; j < nThreads; j++) {
989  batches.reserve(nThreads);
990  batches.push_back(trainingData.GetBatch());
991  }
992  if (settings.momentum > 0.0) {
993  minimizer.StepMomentum(net, nets, batches, settings.momentum);
994  } else {
995  minimizer.Step(net, nets, batches);
996  }
997  }
998 
999  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1000 
1001  // Compute test error.
1002  Double_t testError = 0.0;
1003  for (auto batch : testData) {
1004  auto inputMatrix = batch.GetInput();
1005  auto outputMatrix = batch.GetOutput();
1006  testError += testNet.Loss(inputMatrix, outputMatrix);
1007  }
1008  testError /= (Double_t) (nTestSamples / settings.batchSize);
1009 
1010  end = std::chrono::system_clock::now();
1011 
1012  // Compute training error.
1013  Double_t trainingError = 0.0;
1014  for (auto batch : trainingData) {
1015  auto inputMatrix = batch.GetInput();
1016  auto outputMatrix = batch.GetOutput();
1017  trainingError += net.Loss(inputMatrix, outputMatrix);
1018  }
1019  trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1020 
1021  if (fInteractive){
1022  fInteractive->AddPoint(stepCount, trainingError, testError);
1023  fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1024  if (fExitFromTraining) break;
1025  }
1026 
1027  // Compute numerical throughput.
1028  std::chrono::duration<double> elapsed_seconds = end - start;
1029  double seconds = elapsed_seconds.count();
1030  double nFlops = (double) (settings.testInterval * batchesInEpoch);
1031  nFlops *= net.GetNFlops() * 1e-9;
1032 
1033  converged = minimizer.HasConverged(testError);
1034  start = std::chrono::system_clock::now();
1035 
1036  if (fInteractive) {
1037  fInteractive->AddPoint(stepCount, trainingError, testError);
1038  fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1039  / minimizer.GetConvergenceSteps ();
1040  if (fExitFromTraining) break;
1041  } else {
1042  Log() << std::setw(10) << stepCount << " | "
1043  << std::setw(12) << trainingError
1044  << std::setw(12) << testError
1045  << std::setw(12) << nFlops / seconds
1046  << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1047  if (converged) {
1048  Log() << Endl;
1049  }
1050  }
1051  }
1052  }
1053 
1054 
1055  for (size_t l = 0; l < net.GetDepth(); l++) {
1056  auto & layer = fNet.GetLayer(l);
1057  layer.GetWeights() = (TMatrixT<Double_t>) net.GetLayer(l).GetWeights();
1058  layer.GetBiases() = (TMatrixT<Double_t>) net.GetLayer(l).GetBiases();
1059  }
1060  }
1061 
1062 #else // DNNCPU flag not set.
1063  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1064  "you have a BLAS implementation and it was successfully "
1065  "detected by CMake as well that the imt CMake flag is set." << Endl;
1066 #endif // DNNCPU
1067 }
1068 
1069 //______________________________________________________________________________
1071 {
1072  size_t nVariables = GetEvent()->GetNVariables();
1073  Matrix_t X(1, nVariables);
1074  Matrix_t YHat(1, 1);
1075 
1076  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1077  for (size_t i = 0; i < nVariables; i++) {
1078  X(0,i) = inputValues[i];
1079  }
1080 
1081  fNet.Prediction(YHat, X, fOutputFunction);
1082  return YHat(0,0);
1083 }
1084 
1085 //______________________________________________________________________________
1086 const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1087 {
1088  size_t nVariables = GetEvent()->GetNVariables();
1089  Matrix_t X(1, nVariables);
1090 
1091  const Event *ev = GetEvent();
1092  const std::vector<Float_t>& inputValues = ev->GetValues();
1093  for (size_t i = 0; i < nVariables; i++) {
1094  X(0,i) = inputValues[i];
1095  }
1096 
1097  size_t nTargets = std::max(1u, ev->GetNTargets());
1098  Matrix_t YHat(1, nTargets);
1099  std::vector<Float_t> output(nTargets);
1100  auto net = fNet.CreateClone(1);
1101  net.Prediction(YHat, X, fOutputFunction);
1102 
1103  for (size_t i = 0; i < nTargets; i++)
1104  output[i] = YHat(0, i);
1105 
1106  if (fRegressionReturnVal == NULL) {
1107  fRegressionReturnVal = new std::vector<Float_t>();
1108  }
1109  fRegressionReturnVal->clear();
1110 
1111  Event * evT = new Event(*ev);
1112  for (size_t i = 0; i < nTargets; ++i) {
1113  evT->SetTarget(i, output[i]);
1114  }
1115 
1116  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1117  for (size_t i = 0; i < nTargets; ++i) {
1118  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1119  }
1120  delete evT;
1121  return *fRegressionReturnVal;
1122 }
1123 
1124 const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1125 {
1126  size_t nVariables = GetEvent()->GetNVariables();
1127  Matrix_t X(1, nVariables);
1128  Matrix_t YHat(1, DataInfo().GetNClasses());
1129  if (fMulticlassReturnVal == NULL) {
1130  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1131  }
1132 
1133  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1134  for (size_t i = 0; i < nVariables; i++) {
1135  X(0,i) = inputValues[i];
1136  }
1137 
1138  fNet.Prediction(YHat, X, fOutputFunction);
1139  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1140  (*fMulticlassReturnVal)[i] = YHat(0, i);
1141  }
1142  return *fMulticlassReturnVal;
1143 }
1144 
1145 //______________________________________________________________________________
1146 void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1147 {
1148  void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1149  Int_t inputWidth = fNet.GetInputWidth();
1150  Int_t depth = fNet.GetDepth();
1151  char lossFunction = static_cast<char>(fNet.GetLossFunction());
1152  gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1153  gTools().StringFromInt(inputWidth));
1154  gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1155  gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1156  gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1157  TString(static_cast<char>(fOutputFunction)));
1158 
1159  for (Int_t i = 0; i < depth; i++) {
1160  const auto& layer = fNet.GetLayer(i);
1161  auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1162  int activationFunction = static_cast<int>(layer.GetActivationFunction());
1163  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1164  TString::Itoa(activationFunction, 10));
1165  WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1166  WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1167  }
1168 }
1169 
1170 //______________________________________________________________________________
1172 {
1173  auto netXML = gTools().GetChild(rootXML, "Weights");
1174  if (!netXML){
1175  netXML = rootXML;
1176  }
1177 
1178  fNet.Clear();
1179  fNet.SetBatchSize(1);
1180 
1181  size_t inputWidth, depth;
1182  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1183  gTools().ReadAttr(netXML, "Depth", depth);
1184  char lossFunctionChar;
1185  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1186  char outputFunctionChar;
1187  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1188 
1189  fNet.SetInputWidth(inputWidth);
1190  fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1191  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1192 
1193  size_t previousWidth = inputWidth;
1194  auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1195  for (size_t i = 0; i < depth; i++) {
1196  TString fString;
1198 
1199  // Read activation function.
1200  gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1201  f = static_cast<EActivationFunction>(fString.Atoi());
1202 
1203  // Read number of neurons.
1204  size_t width;
1205  auto matrixXML = gTools().GetChild(layerXML, "Weights");
1206  gTools().ReadAttr(matrixXML, "rows", width);
1207 
1208  fNet.AddLayer(width, f);
1209  TMatrixT<Double_t> weights(width, previousWidth);
1210  TMatrixT<Double_t> biases(width, 1);
1211  ReadMatrixXML(layerXML, "Weights", weights);
1212  ReadMatrixXML(layerXML, "Biases", biases);
1213  fNet.GetLayer(i).GetWeights() = weights;
1214  fNet.GetLayer(i).GetBiases() = biases;
1215 
1216  layerXML = gTools().GetNextChild(layerXML);
1217  previousWidth = width;
1218  }
1219 }
1220 
1221 //______________________________________________________________________________
1222 void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1223 {
1224 }
1225 
1226 //______________________________________________________________________________
1228 {
1229  fRanking = new Ranking( GetName(), "Importance" );
1230  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1231  fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1232  }
1233  return fRanking;
1234 }
1235 
1236 //______________________________________________________________________________
1237 void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1238  const TString& /*className*/ ) const
1239 {
1240 }
1241 
1242 //______________________________________________________________________________
1244 {
1245  // get help message text
1246  //
1247  // typical length of text line:
1248  // "|--------------------------------------------------------------|"
1249  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1250  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1251 
1252  Log() << Endl;
1253  Log() << col << "--- Short description:" << colres << Endl;
1254  Log() << Endl;
1255  Log() << "The DNN neural network is a feedforward" << Endl;
1256  Log() << "multilayer perceptron impementation. The DNN has a user-" << Endl;
1257  Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1258  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1259  Log() << "signal and one background, regression or multiclass). " << Endl;
1260  Log() << Endl;
1261  Log() << col << "--- Performance optimisation:" << colres << Endl;
1262  Log() << Endl;
1263 
1264  const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1265 reduction of overfitting: \n \
1266 \n \
1267  - different training settings can be stacked. Such that the initial training \n\
1268  is done with a large learning rate and a large drop out fraction whilst \n \
1269  in a later stage learning rate and drop out can be reduced. \n \
1270  - drop out \n \
1271  [recommended: \n \
1272  initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1273  later training stage: 0.1 or 0.0 for all layers \n \
1274  final training stage: 0.0] \n \
1275  Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1276  nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1277  - L1 and L2 regularization are available \n \
1278  - Minibatches \n \
1279  [recommended 10 - 150] \n \
1280  Arbitrary mini-batch sizes can be chosen. \n \
1281  - Multithreading \n \
1282  [recommended: True] \n \
1283  Multithreading can be turned on. The minibatches are distributed to the available \n \
1284  cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1285  \n \
1286  Options: \n \
1287  \"Layout\": \n \
1288  - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1289  - meaning: \n \
1290  . two hidden layers (separated by \",\") \n \
1291  . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1292  . the activation function for the output layer is LINEAR \n \
1293  . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1294  . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1295  . the number of nodes in the output layer is determined by the number of output nodes \n \
1296  and can therefore not be chosen freely. \n \
1297  \n \
1298  \"ErrorStrategy\": \n \
1299  - SUMOFSQUARES \n \
1300  The error of the neural net is determined by a sum-of-squares error function \n \
1301  For regression, this is the only possible choice. \n \
1302  - CROSSENTROPY \n \
1303  The error of the neural net is determined by a cross entropy function. The \n \
1304  output values are automatically (internally) transformed into probabilities \n \
1305  using a sigmoid function. \n \
1306  For signal/background classification this is the default choice. \n \
1307  For multiclass using cross entropy more than one or no output classes \n \
1308  can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1309  A and C is true, Event 2: C is true, ...) \n \
1310  - MUTUALEXCLUSIVE \n \
1311  In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1312  \n \
1313  \"WeightInitialization\" \n \
1314  - XAVIER \n \
1315  [recommended] \n \
1316  \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1317  such that the variance of the values of the nodes is preserved for each layer. \n \
1318  - XAVIERUNIFORM \n \
1319  The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1320  - LAYERSIZE \n \
1321  Random values scaled by the layer size \n \
1322  \n \
1323  \"TrainingStrategy\" \n \
1324  - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1325  - explanation: two stacked training settings separated by \"|\" \n \
1326  . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1327  . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1328  . LearningRate : \n \
1329  - recommended for classification: 0.1 initially, 1e-4 later \n \
1330  - recommended for regression: 1e-4 and less \n \
1331  . Momentum : \n \
1332  preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1333  . Repetitions : \n \
1334  train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1335  . ConvergenceSteps : \n \
1336  Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1337  of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1338  cycle the test sampes are evaluated and thus the convergence is checked) \n \
1339  . BatchSize \n \
1340  Size of the mini-batches. \n \
1341  . TestRepetitions \n \
1342  Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1343  . WeightDecay \n \
1344  If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1345  . Renormalize \n \
1346  NONE, L1 (|w|) or L2 (w^2) \n \
1347  . DropConfig \n \
1348  Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1349  in the DropConfig. \n \
1350  [example: DropConfig=0.0+0.5+0.3 \n \
1351  meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1352  in layer 2 \n \
1353  recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1354  turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1355  turned on (0.0) in later training stages] \n \
1356  . DropRepetitions \n \
1357  Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1358  [recommended : 1] \n \
1359  . Multithreading \n \
1360  turn on multithreading [recommended: True] \n \
1361  \n";
1362  Log () << txt << Endl;
1363 }
1364 
1365 } // namespace TMVA
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
Config & gConfig()
Definition: Config.cxx:43
void GetHelpMessage() const
Definition: MethodDNN.cxx:1243
An array of TObjects.
Definition: TObjArray.h:39
TXMLEngine & xmlengine()
Definition: Tools.h:278
DNN::EOutputFunction fOutputFunction
Definition: MethodDNN.h:110
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2069
Double_t Eval(Double_t x) const
Definition: TFormula.cxx:2569
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
void ProcessOptions()
Definition: MethodDNN.cxx:386
Collectable string class.
Definition: TObjString.h:32
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:86
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:381
UInt_t GetNvar() const
Definition: MethodBase.h:340
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:128
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1237
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:129
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1102
bool fExitFromTraining
Definition: MethodBase.h:443
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
Basic string class.
Definition: TString.h:137
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:81
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Scalar_t GetNFlops()
Definition: Net.h:350
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Definition: Pattern.h:7
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
UInt_t GetNTargets() const
Definition: MethodBase.h:342
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:151
neural net
Definition: NeuralNet.h:1068
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:346
LayoutVector_t fLayout
Definition: MethodDNN.h:117
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition: Net.h:224
Tools & gTools()
Definition: Tools.cxx:79
void ReadWeightsFromStream(std::istream &i)
Definition: MethodDNN.cxx:1222
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition: Net.h:253
std::vector< std::pair< int, DNN::EActivationFunction > > LayoutVector_t
Definition: MethodDNN.h:85
static const std::string separator("@@@")
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:97
const Event * GetEvent() const
Definition: MethodBase.h:745
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:194
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1171
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
Generic neural network class.
Definition: Net.h:49
TString fErrorStrategy
Definition: MethodDNN.h:113
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
TString fArchitectureString
Definition: MethodDNN.h:116
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
static void ReadMatrixXML(void *xml, const char *name, TMatrixT< Double_t > &X)
Definition: MethodDNN.h:197
UInt_t fIPyCurrentIter
Definition: MethodBase.h:444
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:316
KeyValueVector_t fSettings
Definition: MethodDNN.h:121
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1468
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
virtual ~MethodDNN()
Definition: MethodDNN.cxx:94
TRandom2 r(17)
const char * GetName() const
Definition: MethodBase.h:330
TDataLoader.
Definition: DataLoader.h:72
The F O R M U L A class.
Definition: TFormula.h:89
TString fLayoutString
Definition: MethodDNN.h:112
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1227
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
UInt_t fIPyMaxIter
Definition: MethodBase.h:444
void SetRegularization(ERegularization R)
Definition: Net.h:149
unsigned int UInt_t
Definition: RtypesCore.h:42
void Clear()
Remove all layers from the network.
Definition: Net.h:238
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
static void WriteMatrixXML(void *parent, const char *name, const TMatrixT< Double_t > &X)
Definition: MethodDNN.h:174
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Definition: MethodDNN.cxx:101
size_t GetDepth() const
Definition: Net.h:136
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1070
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
TLine * l
Definition: textangle.C:4
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
Settings for the training of the neural net.
Definition: NeuralNet.h:736
TString GetString() const
Definition: TObjString.h:50
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:211
DNN::EInitialization fWeightInitialization
Definition: MethodDNN.h:109
UInt_t GetNVariables() const
Definition: MethodBase.h:341
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:305
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
Definition: Arithmetic.cxx:129
Layer defines the layout of a layer.
Definition: NeuralNet.h:676
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:488
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2241
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
size_t GetOutputWidth() const
Definition: Net.h:143
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Definition: MethodDNN.cxx:247
#define NONE
Definition: Rotated.cxx:52
size_t GetInputWidth() const
Definition: Net.h:142
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
ModeOutputValues
Definition: NeuralNet.h:179
double Double_t
Definition: RtypesCore.h:55
Bool_t WriteOptionsReference() const
Definition: Config.h:66
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:289
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:705
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
int type
Definition: TGX11.cxx:120
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
DNN::ERegularization regularization
Definition: MethodDNN.h:93
void AddPreDefVal(const T &)
Definition: Configurable.h:174
TString fTrainingStrategyString
Definition: MethodDNN.h:114
Scalar_t Loss(const Matrix_t &Y, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
TString fWeightInitializationString
Definition: MethodDNN.h:115
void ExitFromTraining()
Definition: MethodBase.h:458
void DeclareOptions()
Definition: MethodDNN.cxx:121
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
std::vector< TTrainingSettings > fTrainingSettings
Definition: MethodDNN.h:118
#define REGISTER_METHOD(CLASS)
for example
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:581
std::vector< Float_t > & GetValues()
Definition: Event.h:96
IPythonInteractive * fInteractive
Definition: MethodBase.h:442
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1070
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:262
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xml node
Definition: TXMLEngine.cxx:993
void SetLossFunction(ELossFunction J)
Definition: Net.h:150
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Definition: MethodDNN.cxx:70
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:614
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
Definition: Net.h:332
void SetInputWidth(size_t inputWidth)
Definition: Net.h:148
LayoutVector_t ParseLayoutString(TString layerSpec)
Definition: MethodDNN.cxx:181
#define NULL
Definition: Rtypes.h:82
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1965
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2031
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1146
size_t GetBatchSize() const
Definition: Net.h:137
const int nn
static void Copy(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
Copy the elements of matrix A into matrix B.
EnumRegularization
Definition: NeuralNet.h:173
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1124
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1086
Layer_t & GetLayer(size_t i)
Definition: Net.h:138
void SetBatchSize(size_t batchSize)
Definition: Net.h:147
if(line.BeginsWith("/*"))
Definition: HLFactory.cxx:443
ELossFunction GetLossFunction() const
Definition: Net.h:140
const char * Data() const
Definition: TString.h:349