Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/cnn:$Id$Ndl
2 // Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3 /**********************************************************************************
4  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5  * Package: TMVA *
6  * Class : MethodDL *
7  * Web : http://tmva.sourceforge.net *
8  * *
9  * Description: *
10  * Deep Neural Network Method *
11  * *
12  * Authors (alphabetical): *
13  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15  * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #include "TFormula.h"
29 #include "TString.h"
30 #include "TMath.h"
31 #include "TObjString.h"
32 
33 #include "TMVA/Tools.h"
34 #include "TMVA/Configurable.h"
35 #include "TMVA/IMethod.h"
36 #include "TMVA/ClassifierFactory.h"
37 #include "TMVA/MethodDL.h"
38 #include "TMVA/Types.h"
40 #include "TMVA/DNN/Functions.h"
41 #include "TMVA/DNN/DLMinimizers.h"
42 #include "TMVA/DNN/SGD.h"
43 #include "TMVA/DNN/Adam.h"
44 #include "TMVA/DNN/Adagrad.h"
45 #include "TMVA/DNN/RMSProp.h"
46 #include "TMVA/DNN/Adadelta.h"
47 #include "TMVA/Timer.h"
48 
49 #include <chrono>
50 
53 
54 using namespace TMVA::DNN::CNN;
55 using namespace TMVA::DNN;
56 
62 
63 
64 namespace TMVA {
65 
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
69 {
70  key.ToUpper();
71  std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
72  if (it == keyValueMap.end()) {
73  return TString("");
74  }
75  return it->second;
76 }
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 template <typename T>
80 T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 template <>
84 int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
85 {
86  TString value(fetchValueTmp(keyValueMap, key));
87  if (value == "") {
88  return defaultValue;
89  }
90  return value.Atoi();
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 template <>
95 double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
96 {
97  TString value(fetchValueTmp(keyValueMap, key));
98  if (value == "") {
99  return defaultValue;
100  }
101  return value.Atof();
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 template <>
106 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
107 {
108  TString value(fetchValueTmp(keyValueMap, key));
109  if (value == "") {
110  return defaultValue;
111  }
112  return value;
113 }
114 
115 ////////////////////////////////////////////////////////////////////////////////
116 template <>
117 bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
118 {
119  TString value(fetchValueTmp(keyValueMap, key));
120  if (value == "") {
121  return defaultValue;
122  }
123 
124  value.ToUpper();
125  if (value == "TRUE" || value == "T" || value == "1") {
126  return true;
127  }
128 
129  return false;
130 }
131 
132 ////////////////////////////////////////////////////////////////////////////////
133 template <>
134 std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
135  std::vector<double> defaultValue)
136 {
137  TString parseString(fetchValueTmp(keyValueMap, key));
138  if (parseString == "") {
139  return defaultValue;
140  }
141 
142  parseString.ToUpper();
143  std::vector<double> values;
144 
145  const TString tokenDelim("+");
146  TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
147  TIter nextToken(tokenStrings);
148  TObjString *tokenString = (TObjString *)nextToken();
149  for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
150  std::stringstream sstr;
151  double currentValue;
152  sstr << tokenString->GetString().Data();
153  sstr >> currentValue;
154  values.push_back(currentValue);
155  }
156  return values;
157 }
158 
159 ////////////////////////////////////////////////////////////////////////////////
161 {
162  // Set default values for all option strings
163 
164  DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
165 
166  DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
167 
168  DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
169 
170  DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
171  " or cross entropy (binary classification).");
172  AddPreDefVal(TString("CROSSENTROPY"));
173  AddPreDefVal(TString("SUMOFSQUARES"));
174  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
175 
176  DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
177  AddPreDefVal(TString("XAVIER"));
178  AddPreDefVal(TString("XAVIERUNIFORM"));
179  AddPreDefVal(TString("GAUSS"));
180  AddPreDefVal(TString("UNIFORM"));
181  AddPreDefVal(TString("IDENTITY"));
182  AddPreDefVal(TString("ZERO"));
183 
184  DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
185 
186  DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
187  "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
188  "Specify as 100 to use exactly 100 events. (Default: 20%)");
189 
190  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
191  AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
192  AddPreDefVal(TString("CPU"));
193  AddPreDefVal(TString("GPU"));
194  AddPreDefVal(TString("OPENCL")); // not yet implemented
195  AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
196 
197  // define training strategy separated by a separator "|"
198  DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
199  "Momentum=0.0,"
200  "ConvergenceSteps=100,"
201  "MaxEpochs=2000,"
202  "Optimizer=ADAM,"
203  "BatchSize=30,"
204  "TestRepetitions=1,"
205  "WeightDecay=0.0,"
206  "Regularization=None,"
207  "DropConfig=0.0",
208  "TrainingStrategy", "Defines the training strategies.");
209 }
210 
211 ////////////////////////////////////////////////////////////////////////////////
213 {
214 
215  if (IgnoreEventsWithNegWeightsInTraining()) {
216  Log() << kINFO << "Will ignore negative events in training!" << Endl;
217  }
218 
219  if (fArchitectureString == "STANDARD") {
220  Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
221  "Please use Architecture=CPU or Architecture=CPU."
222  "See the TMVA Users' Guide for instructions if you "
223  "encounter problems."
224  << Endl;
225  Log() << kINFO << "We will use instead the CPU architecture" << Endl;
226  fArchitectureString = "CPU";
227  }
228  if (fArchitectureString == "OPENCL") {
229  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
230  "Please use Architecture=CPU or Architecture=CPU for the "
231  "time being. See the TMVA Users' Guide for instructions "
232  "if you encounter problems."
233  << Endl;
234  // use instead GPU
235  Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
236  fArchitectureString = "GPU";
237  }
238 
239  // the architecture can now be set at runtime as an option
240 
241 
242  if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
243 #ifdef R__HAS_TMVAGPU
244  Log() << kINFO << "Will now use the GPU architecture !" << Endl;
245 #else // case TMVA does not support GPU
246  Log() << kERROR << "CUDA backend not enabled. Please make sure "
247  "you have CUDA installed and it was successfully "
248  "detected by CMAKE by using -Dtmva-gpu=On "
249  << Endl;
250  fArchitectureString = "CPU";
251  Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
252 #endif
253  }
254 
255  if (fArchitectureString == "CPU") {
256 #ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
257  Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
258 #else // TMVA has no CPU BLAS or IMT support
259  Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
260  "you have a BLAS implementation and it was successfully "
261  "detected by CMake as well that the imt CMake flag is set."
262  << Endl;
263  Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
264 #endif
265  }
266 
267  // Input Layout
268  ParseInputLayout();
269  ParseBatchLayout();
270 
271  // Loss function and output.
272  fOutputFunction = EOutputFunction::kSigmoid;
273  if (fAnalysisType == Types::kClassification) {
274  if (fErrorStrategy == "SUMOFSQUARES") {
275  fLossFunction = ELossFunction::kMeanSquaredError;
276  }
277  if (fErrorStrategy == "CROSSENTROPY") {
278  fLossFunction = ELossFunction::kCrossEntropy;
279  }
280  fOutputFunction = EOutputFunction::kSigmoid;
281  } else if (fAnalysisType == Types::kRegression) {
282  if (fErrorStrategy != "SUMOFSQUARES") {
283  Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
284  << " neural net error function. Setting error function to "
285  << " SUMOFSQUARES now." << Endl;
286  }
287 
288  fLossFunction = ELossFunction::kMeanSquaredError;
289  fOutputFunction = EOutputFunction::kIdentity;
290  } else if (fAnalysisType == Types::kMulticlass) {
291  if (fErrorStrategy == "SUMOFSQUARES") {
292  fLossFunction = ELossFunction::kMeanSquaredError;
293  }
294  if (fErrorStrategy == "CROSSENTROPY") {
295  fLossFunction = ELossFunction::kCrossEntropy;
296  }
297  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
298  fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
299  }
300  fOutputFunction = EOutputFunction::kSoftmax;
301  }
302 
303  // Initialization
304  // the biases will be always initialized to zero
305  if (fWeightInitializationString == "XAVIER") {
306  fWeightInitialization = DNN::EInitialization::kGlorotNormal;
307  } else if (fWeightInitializationString == "XAVIERUNIFORM") {
308  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
309  } else if (fWeightInitializationString == "GAUSS") {
310  fWeightInitialization = DNN::EInitialization::kGauss;
311  } else if (fWeightInitializationString == "UNIFORM") {
312  fWeightInitialization = DNN::EInitialization::kUniform;
313  } else if (fWeightInitializationString == "ZERO") {
314  fWeightInitialization = DNN::EInitialization::kZero;
315  } else if (fWeightInitializationString == "IDENTITY") {
316  fWeightInitialization = DNN::EInitialization::kIdentity;
317  } else {
318  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
319  }
320 
321  // Training settings.
322 
323  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));
324  for (auto &block : strategyKeyValues) {
325  TTrainingSettings settings;
326 
327  settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
328  settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
329  settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
330  settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
331  settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
332  settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
333  settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
334  settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
335 
336  TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
337  if (regularization == "L1") {
339  } else if (regularization == "L2") {
341  } else {
343  }
344 
345  TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
346  settings.optimizerName = optimizer;
347  if (optimizer == "SGD") {
348  settings.optimizer = DNN::EOptimizer::kSGD;
349  } else if (optimizer == "ADAM") {
351  } else if (optimizer == "ADAGRAD") {
353  } else if (optimizer == "RMSPROP") {
355  } else if (optimizer == "ADADELTA") {
357  } else {
358  // Make Adam as default choice if the input string is
359  // incorrect.
361  settings.optimizerName = "ADAM";
362  }
363 
364  fTrainingSettings.push_back(settings);
365  }
366 
367  // this set fInputShape[0] = batchSize
368  this->SetBatchSize(fTrainingSettings.front().batchSize);
369 
370  // case inputlayout and batch layout was not given. Use default then
371  // (1, batchsize, nvariables)
372  // fInputShape[0] -> BatchSize
373  // fInputShape[1] -> InputDepth
374  // fInputShape[2] -> InputHeight
375  // fInputShape[3] -> InputWidth
376  if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
377  fInputShape[1] = 1;
378  fInputShape[2] = 1;
379  fInputShape[3] = GetNVariables();
380  }
381  // case when batch layout is not provided (all zero)
382  // batch layout can be determined by the input layout + batch size
383  // case DNN : { 1, B, W }
384  // case CNN : { B, C, H*W}
385  // case RNN : { B, T, H*W }
386 
387  if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
388  // case first layer is DENSE
389  if (fInputShape[2] == 1 && fInputShape[1] == 1) {
390  // case of (1, batchsize, input features)
391  fBatchDepth = 1;
392  fBatchHeight = fTrainingSettings.front().batchSize;
393  fBatchWidth = fInputShape[3];
394  }
395  else { // more general cases (e.g. for CNN)
396  // case CONV or RNN
397  fBatchDepth = fTrainingSettings.front().batchSize;
398  fBatchHeight = fInputShape[1];
399  fBatchWidth = fInputShape[3]*fInputShape[2];
400  }
401  }
402 }
403 
404 ////////////////////////////////////////////////////////////////////////////////
405 /// default initializations
407 {
408  // Nothing to do here
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// Parse the input layout
414 {
415  // Define the delimiter
416  const TString delim("|");
417 
418  // Get the input layout string
419  TString inputLayoutString = this->GetInputLayoutString();
420 
421  // Split the input layout string
422  TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
423  TIter nextInputDim(inputDimStrings);
424  TObjString *inputDimString = (TObjString *)nextInputDim();
425 
426  // Go through every token and save its absolute value in the shape array
427  // The first token is the batch size for easy compatibility with cudnn
428  int subDim = 1;
429  std::vector<size_t> inputShape;
430  inputShape.reserve(inputLayoutString.Length()/2 + 2);
431  inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
432  for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
433  // size_t is unsigned
434  subDim = (size_t) abs(inputDimString->GetString().Atoi());
435  // Size among unused dimensions should be set to 1 for cudnn
436  //if (subDim == 0) subDim = 1;
437  inputShape.push_back(subDim);
438  }
439  // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
440  // for example in case of dense layer input layouts
441  // when we will support 3D convolutions we would need to add extra 1's
442  if (inputShape.size() == 2) {
443  // case of dense layer where only width is specified
444  inputShape.insert(inputShape.begin() + 1, {1,1});
445  }
446  else if (inputShape.size() == 3) {
447  //e.g. case of RNN T,W -> T,1,W
448  inputShape.insert(inputShape.begin() + 2, 1);
449  }
450 
451  this->SetInputShape(inputShape);
452 }
453 
454 ////////////////////////////////////////////////////////////////////////////////
455 /// Parse the input layout
457 {
458  // Define the delimiter
459  const TString delim("|");
460 
461  // Get the input layout string
462  TString batchLayoutString = this->GetBatchLayoutString();
463 
464  size_t batchDepth = 0;
465  size_t batchHeight = 0;
466  size_t batchWidth = 0;
467 
468  // Split the input layout string
469  TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
470  TIter nextBatchDim(batchDimStrings);
471  TObjString *batchDimString = (TObjString *)nextBatchDim();
472  int idxToken = 0;
473 
474  for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
475  switch (idxToken) {
476  case 0: // input depth
477  {
478  TString strDepth(batchDimString->GetString());
479  batchDepth = (size_t)strDepth.Atoi();
480  } break;
481  case 1: // input height
482  {
483  TString strHeight(batchDimString->GetString());
484  batchHeight = (size_t)strHeight.Atoi();
485  } break;
486  case 2: // input width
487  {
488  TString strWidth(batchDimString->GetString());
489  batchWidth = (size_t)strWidth.Atoi();
490  } break;
491  }
492  ++idxToken;
493  }
494 
495  this->SetBatchDepth(batchDepth);
496  this->SetBatchHeight(batchHeight);
497  this->SetBatchWidth(batchWidth);
498 }
499 
500 ////////////////////////////////////////////////////////////////////////////////
501 /// Create a deep net based on the layout string
502 template <typename Architecture_t, typename Layer_t>
504  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)
505 {
506  // Layer specification, layer details
507  const TString layerDelimiter(",");
508  const TString subDelimiter("|");
509 
510  TString layoutString = this->GetLayoutString();
511 
512  //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
513 
514  // Split layers
515  TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
516  TIter nextLayer(layerStrings);
517  TObjString *layerString = (TObjString *)nextLayer();
518 
519 
520  for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
521 
522  // Split layer details
523  TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
524  TIter nextToken(subStrings);
525  TObjString *token = (TObjString *)nextToken();
526 
527  // Determine the type of the layer
528  TString strLayerType = token->GetString();
529 
530 
531  if (strLayerType == "DENSE") {
532  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
533  } else if (strLayerType == "CONV") {
534  ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
535  } else if (strLayerType == "MAXPOOL") {
536  ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
537  } else if (strLayerType == "RESHAPE") {
538  ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
539  } else if (strLayerType == "BNORM") {
540  ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
541  } else if (strLayerType == "RNN") {
542  ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
543  } else if (strLayerType == "LSTM") {
544  ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
545  } else if (strLayerType == "GRU") {
546  ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
547  } else {
548  // no type of layer specified - assume is dense layer as in old DNN interface
549  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
550  }
551  }
552 }
553 
554 ////////////////////////////////////////////////////////////////////////////////
555 /// Pases the layer string and creates the appropriate dense layer
556 template <typename Architecture_t, typename Layer_t>
558  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
559  TString delim)
560 {
561  int width = 0;
562  EActivationFunction activationFunction = EActivationFunction::kTanh;
563 
564  // this return number of input variables for the method
565  // it can be used to deduce width of dense layer if specified as N+10
566  // where N is the number of input variables
567  const size_t inputSize = GetNvar();
568 
569  // Split layer details
570  TObjArray *subStrings = layerString.Tokenize(delim);
571  TIter nextToken(subStrings);
572  TObjString *token = (TObjString *)nextToken();
573  int idxToken = 0;
574 
575  // loop on the tokens
576  // order of sepcifying width and activation function is not relevant
577  // both 100|TANH and TANH|100 are valid cases
578  for (; token != nullptr; token = (TObjString *)nextToken()) {
579  idxToken++;
580  // try a match with the activation function
581  TString strActFnc(token->GetString());
582  // if first token defines the layer type- skip it
583  if (strActFnc =="DENSE") continue;
584 
585  if (strActFnc == "RELU") {
586  activationFunction = DNN::EActivationFunction::kRelu;
587  } else if (strActFnc == "TANH") {
588  activationFunction = DNN::EActivationFunction::kTanh;
589  } else if (strActFnc == "FTANH") {
590  activationFunction = DNN::EActivationFunction::kFastTanh;
591  } else if (strActFnc == "SYMMRELU") {
592  activationFunction = DNN::EActivationFunction::kSymmRelu;
593  } else if (strActFnc == "SOFTSIGN") {
594  activationFunction = DNN::EActivationFunction::kSoftSign;
595  } else if (strActFnc == "SIGMOID") {
596  activationFunction = DNN::EActivationFunction::kSigmoid;
597  } else if (strActFnc == "LINEAR") {
598  activationFunction = DNN::EActivationFunction::kIdentity;
599  } else if (strActFnc == "GAUSS") {
600  activationFunction = DNN::EActivationFunction::kGauss;
601  } else if (width == 0) {
602  // no match found try to parse as text showing the width
603  // support for input a formula where the variable 'x' is 'N' in the string
604  // use TFormula for the evaluation
605  TString strNumNodes = strActFnc;
606  // number of nodes
607  TString strN("x");
608  strNumNodes.ReplaceAll("N", strN);
609  strNumNodes.ReplaceAll("n", strN);
610  TFormula fml("tmp", strNumNodes);
611  width = fml.Eval(inputSize);
612  }
613  }
614  // avoid zero width. assume is last layer and give width = output width
615  // Determine the number of outputs
616  size_t outputSize = 1;
617  if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
618  outputSize = GetNTargets();
619  } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
620  outputSize = DataInfo().GetNClasses();
621  }
622  if (width == 0) width = outputSize;
623 
624  // Add the dense layer, initialize the weights and biases and copy
625  TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
626  denseLayer->Initialize();
627 
628  // add same layer to fNet
629  if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
630 
631  //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
632 
633  // add the copy to all slave nets
634  //for (size_t i = 0; i < nets.size(); i++) {
635  // nets[i].AddDenseLayer(copyDenseLayer);
636  //}
637 
638  // check compatibility of added layer
639  // for a dense layer input should be 1 x 1 x DxHxW
640 }
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 /// Pases the layer string and creates the appropriate convolutional layer
644 template <typename Architecture_t, typename Layer_t>
646  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
647  TString delim)
648 {
649  int depth = 0;
650  int fltHeight = 0;
651  int fltWidth = 0;
652  int strideRows = 0;
653  int strideCols = 0;
654  int zeroPadHeight = 0;
655  int zeroPadWidth = 0;
656  EActivationFunction activationFunction = EActivationFunction::kTanh;
657 
658  // Split layer details
659  TObjArray *subStrings = layerString.Tokenize(delim);
660  TIter nextToken(subStrings);
661  TObjString *token = (TObjString *)nextToken();
662  int idxToken = 0;
663 
664  for (; token != nullptr; token = (TObjString *)nextToken()) {
665  switch (idxToken) {
666  case 1: // depth
667  {
668  TString strDepth(token->GetString());
669  depth = strDepth.Atoi();
670  } break;
671  case 2: // filter height
672  {
673  TString strFltHeight(token->GetString());
674  fltHeight = strFltHeight.Atoi();
675  } break;
676  case 3: // filter width
677  {
678  TString strFltWidth(token->GetString());
679  fltWidth = strFltWidth.Atoi();
680  } break;
681  case 4: // stride in rows
682  {
683  TString strStrideRows(token->GetString());
684  strideRows = strStrideRows.Atoi();
685  } break;
686  case 5: // stride in cols
687  {
688  TString strStrideCols(token->GetString());
689  strideCols = strStrideCols.Atoi();
690  } break;
691  case 6: // zero padding height
692  {
693  TString strZeroPadHeight(token->GetString());
694  zeroPadHeight = strZeroPadHeight.Atoi();
695  } break;
696  case 7: // zero padding width
697  {
698  TString strZeroPadWidth(token->GetString());
699  zeroPadWidth = strZeroPadWidth.Atoi();
700  } break;
701  case 8: // activation function
702  {
703  TString strActFnc(token->GetString());
704  if (strActFnc == "RELU") {
705  activationFunction = DNN::EActivationFunction::kRelu;
706  } else if (strActFnc == "TANH") {
707  activationFunction = DNN::EActivationFunction::kTanh;
708  } else if (strActFnc == "SYMMRELU") {
709  activationFunction = DNN::EActivationFunction::kSymmRelu;
710  } else if (strActFnc == "SOFTSIGN") {
711  activationFunction = DNN::EActivationFunction::kSoftSign;
712  } else if (strActFnc == "SIGMOID") {
713  activationFunction = DNN::EActivationFunction::kSigmoid;
714  } else if (strActFnc == "LINEAR") {
715  activationFunction = DNN::EActivationFunction::kIdentity;
716  } else if (strActFnc == "GAUSS") {
717  activationFunction = DNN::EActivationFunction::kGauss;
718  }
719  } break;
720  }
721  ++idxToken;
722  }
723 
724  // Add the convolutional layer, initialize the weights and biases and copy
725  TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
726  zeroPadHeight, zeroPadWidth, activationFunction);
727  convLayer->Initialize();
728 
729  // Add same layer to fNet
730  if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
731  zeroPadHeight, zeroPadWidth, activationFunction);
732 
733  //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
734 
735  //// add the copy to all slave nets
736  //for (size_t i = 0; i < nets.size(); i++) {
737  // nets[i].AddConvLayer(copyConvLayer);
738  //}
739 }
740 
741 ////////////////////////////////////////////////////////////////////////////////
742 /// Pases the layer string and creates the appropriate max pool layer
743 template <typename Architecture_t, typename Layer_t>
745  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
746  TString delim)
747 {
748 
749  int filterHeight = 0;
750  int filterWidth = 0;
751  int strideRows = 0;
752  int strideCols = 0;
753 
754  // Split layer details
755  TObjArray *subStrings = layerString.Tokenize(delim);
756  TIter nextToken(subStrings);
757  TObjString *token = (TObjString *)nextToken();
758  int idxToken = 0;
759 
760  for (; token != nullptr; token = (TObjString *)nextToken()) {
761  switch (idxToken) {
762  case 1: // filter height
763  {
764  TString strFrmHeight(token->GetString());
765  filterHeight = strFrmHeight.Atoi();
766  } break;
767  case 2: // filter width
768  {
769  TString strFrmWidth(token->GetString());
770  filterWidth = strFrmWidth.Atoi();
771  } break;
772  case 3: // stride in rows
773  {
774  TString strStrideRows(token->GetString());
775  strideRows = strStrideRows.Atoi();
776  } break;
777  case 4: // stride in cols
778  {
779  TString strStrideCols(token->GetString());
780  strideCols = strStrideCols.Atoi();
781  } break;
782  }
783  ++idxToken;
784  }
785 
786  // Add the Max pooling layer
787  // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
788  deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
789 
790  // Add the same layer to fNet
791  if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
792 
793 
794  //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
795 
796  //// add the copy to all slave nets
797  //for (size_t i = 0; i < nets.size(); i++) {
798  // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
799  //}
800 }
801 
802 ////////////////////////////////////////////////////////////////////////////////
803 /// Pases the layer string and creates the appropriate reshape layer
804 template <typename Architecture_t, typename Layer_t>
806  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
807  TString delim)
808 {
809  int depth = 0;
810  int height = 0;
811  int width = 0;
812  bool flattening = false;
813 
814  // Split layer details
815  TObjArray *subStrings = layerString.Tokenize(delim);
816  TIter nextToken(subStrings);
817  TObjString *token = (TObjString *)nextToken();
818  int idxToken = 0;
819 
820  for (; token != nullptr; token = (TObjString *)nextToken()) {
821  if (token->GetString() == "FLAT") idxToken=4;
822  switch (idxToken) {
823  case 1: {
824  TString strDepth(token->GetString());
825  depth = strDepth.Atoi();
826  } break;
827  case 2: // height
828  {
829  TString strHeight(token->GetString());
830  height = strHeight.Atoi();
831  } break;
832  case 3: // width
833  {
834  TString strWidth(token->GetString());
835  width = strWidth.Atoi();
836  } break;
837  case 4: // flattening
838  {
839  TString flat(token->GetString());
840  if (flat == "FLAT") {
841  flattening = true;
842  }
843  } break;
844  }
845  ++idxToken;
846  }
847 
848  // Add the reshape layer
849  // TReshapeLayer<Architecture_t> *reshapeLayer =
850  deepNet.AddReshapeLayer(depth, height, width, flattening);
851 
852  // Add the same layer to fNet
853  if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
854 
855  //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
856 
857  //// add the copy to all slave nets
858  //for (size_t i = 0; i < nets.size(); i++) {
859  // nets[i].AddReshapeLayer(copyReshapeLayer);
860  //}
861 }
862 
863 ////////////////////////////////////////////////////////////////////////////////
864 /// Pases the layer string and creates the appropriate reshape layer
865 template <typename Architecture_t, typename Layer_t>
867  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
868  TString delim)
869 {
870 
871  // default values
872  double momentum = -1; //0.99;
873  double epsilon = 0.0001;
874 
875  // Split layer details
876  TObjArray *subStrings = layerString.Tokenize(delim);
877  TIter nextToken(subStrings);
878  TObjString *token = (TObjString *)nextToken();
879  int idxToken = 0;
880 
881  for (; token != nullptr; token = (TObjString *)nextToken()) {
882  switch (idxToken) {
883  case 1: {
884  momentum = std::atof(token->GetString().Data());
885  } break;
886  case 2: // height
887  {
888  epsilon = std::atof(token->GetString().Data());
889  } break;
890  }
891  ++idxToken;
892  }
893 
894  // Add the batch norm layer
895  //
896  auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
897  layer->Initialize();
898 
899  // Add the same layer to fNet
900  if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
901 
902 }
903 
904 ////////////////////////////////////////////////////////////////////////////////
905 /// Pases the layer string and creates the appropriate rnn layer
906 template <typename Architecture_t, typename Layer_t>
908  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
909  TString delim)
910 {
911  // int depth = 0;
912  int stateSize = 0;
913  int inputSize = 0;
914  int timeSteps = 0;
915  bool rememberState = false;
916  bool returnSequence = false;
917  bool resetGateAfter = false;
918 
919  // Split layer details
920  TObjArray *subStrings = layerString.Tokenize(delim);
921  TIter nextToken(subStrings);
922  TObjString *token = (TObjString *)nextToken();
923  int idxToken = 0;
924 
925  for (; token != nullptr; token = (TObjString *)nextToken()) {
926  switch (idxToken) {
927  case 1: // state size
928  {
929  TString strstateSize(token->GetString());
930  stateSize = strstateSize.Atoi();
931  break;
932  }
933  case 2: // input size
934  {
935  TString strinputSize(token->GetString());
936  inputSize = strinputSize.Atoi();
937  break;
938  }
939  case 3: // time steps
940  {
941  TString strtimeSteps(token->GetString());
942  timeSteps = strtimeSteps.Atoi();
943  break;
944  }
945  case 4: // returnSequence (option stateful in Keras)
946  {
947  TString strrememberState(token->GetString());
948  rememberState = (bool) strrememberState.Atoi();
949  break;
950  }
951  case 5: // return full output sequence (1 or 0)
952  {
953  TString str(token->GetString());
954  returnSequence = (bool)str.Atoi();
955  break;
956  }
957  case 6: // resetGate after option (only for GRU)
958  {
959  TString str(token->GetString());
960  resetGateAfter = (bool)str.Atoi();
961  }
962  }
963  ++idxToken;
964  }
965 
966  // Add the recurrent layer, initialize the weights and biases and copy
967  if (rnnType == kLayerRNN) {
968  auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
969  recurrentLayer->Initialize();
970  // Add same layer to fNet
971  if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
972  }
973  else if (rnnType == kLayerLSTM ) {
974  auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
975  recurrentLayer->Initialize();
976  // Add same layer to fNet
977  if (fBuildNet)
978  fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
979  }
980  else if (rnnType == kLayerGRU) {
981  if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
982  auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
983  recurrentLayer->Initialize();
984  // Add same layer to fNet
985  if (fBuildNet)
986  fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
987  }
988  else {
989  Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
990  }
991 }
992 
993 ////////////////////////////////////////////////////////////////////////////////
994 /// Standard constructor.
995 MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
996  : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
997  fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
998  fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
999  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1000  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1001  fXInput()
1002 {
1003  // Nothing to do here
1004 }
1005 
1006 ////////////////////////////////////////////////////////////////////////////////
1007 /// Constructor from a weight file.
1008 MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1009  : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1010  fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1011  fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1012  fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1013  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1014  fXInput()
1015 {
1016  // Nothing to do here
1017 }
1018 
1019 ////////////////////////////////////////////////////////////////////////////////
1020 /// Destructor.
1022 {
1023  // Nothing to do here
1024 }
1025 
1026 ////////////////////////////////////////////////////////////////////////////////
1027 /// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1028 auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1029 {
1030  // remove empty spaces
1031  parseString.ReplaceAll(" ","");
1032  KeyValueVector_t blockKeyValues;
1033  const TString keyValueDelim("=");
1034 
1035  TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1036  TIter nextBlock(blockStrings);
1037  TObjString *blockString = (TObjString *)nextBlock();
1038 
1039  for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1040  blockKeyValues.push_back(std::map<TString, TString>());
1041  std::map<TString, TString> &currentBlock = blockKeyValues.back();
1042 
1043  TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1044  TIter nextToken(subStrings);
1045  TObjString *token = (TObjString *)nextToken();
1046 
1047  for (; token != nullptr; token = (TObjString *)nextToken()) {
1048  TString strKeyValue(token->GetString());
1049  int delimPos = strKeyValue.First(keyValueDelim.Data());
1050  if (delimPos <= 0) continue;
1051 
1052  TString strKey = TString(strKeyValue(0, delimPos));
1053  strKey.ToUpper();
1054  TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1055 
1056  strKey.Strip(TString::kBoth, ' ');
1057  strValue.Strip(TString::kBoth, ' ');
1058 
1059  currentBlock.insert(std::make_pair(strKey, strValue));
1060  }
1061  }
1062  return blockKeyValues;
1063 }
1064 
1065 ////////////////////////////////////////////////////////////////////////////////
1066 /// What kind of analysis type can handle the CNN
1068 {
1069  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1070  if (type == Types::kMulticlass) return kTRUE;
1071  if (type == Types::kRegression) return kTRUE;
1072 
1073  return kFALSE;
1074 }
1075 
1076 ////////////////////////////////////////////////////////////////////////////////
1077 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1078 /// 100 etc.
1079 /// - 20% and 0.2 selects 20% of the training set as validation data.
1080 /// - 100 selects 100 events as the validation data.
1081 ///
1082 /// @return number of samples in validation set
1083 ///
1085 {
1086  Int_t nValidationSamples = 0;
1087  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1088 
1089  // Parsing + Validation
1090  // --------------------
1091  if (fNumValidationString.EndsWith("%")) {
1092  // Relative spec. format 20%
1093  TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1094 
1095  if (intValStr.IsFloat()) {
1096  Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1097  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1098  } else {
1099  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1100  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1101  }
1102  } else if (fNumValidationString.IsFloat()) {
1103  Double_t valSizeAsDouble = fNumValidationString.Atof();
1104 
1105  if (valSizeAsDouble < 1.0) {
1106  // Relative spec. format 0.2
1107  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1108  } else {
1109  // Absolute spec format 100 or 100.0
1110  nValidationSamples = valSizeAsDouble;
1111  }
1112  } else {
1113  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1114  << Endl;
1115  }
1116 
1117  // Value validation
1118  // ----------------
1119  if (nValidationSamples < 0) {
1120  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1121  }
1122 
1123  if (nValidationSamples == 0) {
1124  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1125  }
1126 
1127  if (nValidationSamples >= (Int_t)trainingSetSize) {
1128  Log() << kFATAL << "Validation size \"" << fNumValidationString
1129  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1130  }
1131 
1132  return nValidationSamples;
1133 }
1134 
1135 
1136 ////////////////////////////////////////////////////////////////////////////////
1137 /// Implementation of architecture specific train method
1138 ///
1139 template <typename Architecture_t>
1141 {
1142 
1143  using Scalar_t = typename Architecture_t::Scalar_t;
1146  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1147 
1148  bool debug = Log().GetMinType() == kDEBUG;
1149 
1150 
1151  // set the random seed for weight initialization
1152  Architecture_t::SetRandomSeed(fRandomSeed);
1153 
1154  ///split training data in training and validation data
1155  // and determine the number of training and testing examples
1156 
1157  size_t nValidationSamples = GetNumValidationSamples();
1158  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1159 
1160  const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1161  const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1162  const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1163 
1164  size_t trainingPhase = 1;
1165 
1166  for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1167 
1168  size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1169 
1170 
1171  // After the processing of the options, initialize the master deep net
1172  size_t batchSize = settings.batchSize;
1173  this->SetBatchSize(batchSize);
1174  // Should be replaced by actual implementation. No support for this now.
1175  size_t inputDepth = this->GetInputDepth();
1176  size_t inputHeight = this->GetInputHeight();
1177  size_t inputWidth = this->GetInputWidth();
1178  size_t batchDepth = this->GetBatchDepth();
1179  size_t batchHeight = this->GetBatchHeight();
1180  size_t batchWidth = this->GetBatchWidth();
1181  ELossFunction J = this->GetLossFunction();
1183  ERegularization R = settings.regularization;
1184  EOptimizer O = settings.optimizer;
1185  Scalar_t weightDecay = settings.weightDecay;
1186 
1187  //Batch size should be included in batch layout as well. There are two possibilities:
1188  // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1189  // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1190  // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1191  // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1192 
1193  if (batchDepth != batchSize && batchDepth > 1) {
1194  Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1195  return;
1196  }
1197  if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1198  Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1199  return;
1200  }
1201 
1202 
1203  //check also that input layout compatible with batch layout
1204  bool badLayout = false;
1205  // case batch depth == batch size
1206  if (batchDepth == batchSize)
1207  badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1208  // case batch Height is batch size
1209  if (batchHeight == batchSize && batchDepth == 1)
1210  badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1211  if (badLayout) {
1212  Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1213  inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1214  return;
1215  }
1216 
1217  // check batch size is compatible with number of events
1218  if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1219  Log() << kFATAL << "Number of samples in the datasets are train: ("
1220  << nTrainingSamples << ") test: (" << nValidationSamples
1221  << "). One of these is smaller than the batch size of "
1222  << settings.batchSize << ". Please increase the batch"
1223  << " size to be at least the same size as the smallest"
1224  << " of them." << Endl;
1225  }
1226 
1227  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1228 
1229  // create a copy of DeepNet for evaluating but with batch size = 1
1230  // fNet is the saved network and will be with CPU or Referrence architecture
1231  if (trainingPhase == 1) {
1232  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1233  batchHeight, batchWidth, J, I, R, weightDecay));
1234  fBuildNet = true;
1235  }
1236  else
1237  fBuildNet = false;
1238 
1239  // Initialize the vector of slave nets
1240  std::vector<DeepNet_t> nets{};
1241  nets.reserve(nThreads);
1242  for (size_t i = 0; i < nThreads; i++) {
1243  // create a copies of the master deep net
1244  nets.push_back(deepNet);
1245  }
1246 
1247 
1248  // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1249  CreateDeepNet(deepNet, nets);
1250 
1251 
1252  // set droput probabilities
1253  // use convention to store in the layer 1.- dropout probabilities
1254  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1255  for (auto & p : dropoutVector) {
1256  p = 1.0 - p;
1257  }
1258  deepNet.SetDropoutProbabilities(dropoutVector);
1259 
1260  if (trainingPhase > 1) {
1261  // copy initial weights from fNet to deepnet
1262  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1263  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1264  }
1265  }
1266 
1267  // when fNet is built create also input matrix that will be used to evaluate it
1268  if (fBuildNet) {
1269  //int n1 = batchHeight;
1270  //int n2 = batchWidth;
1271  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1272  //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1273  //fXInput = TensorImpl_t(1,n1,n2);
1275  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1276  fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1277  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1278 
1279 
1280  // create pointer to output matrix used for the predictions
1281  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1282 
1283  // print the created network
1284  Log() << "***** Deep Learning Network *****" << Endl;
1285  if (Log().GetMinType() <= kINFO)
1286  deepNet.Print();
1287  }
1288  Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1289 
1290  // Loading the training and validation datasets
1291  TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1292  TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1293  {inputDepth, inputHeight, inputWidth},
1294  {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1295  deepNet.GetOutputWidth(), nThreads);
1296 
1297  TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1298  TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1299  {inputDepth, inputHeight, inputWidth},
1300  { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1301  deepNet.GetOutputWidth(), nThreads);
1302 
1303 
1304 
1305  // do an evaluation of the network to compute initial minimum test error
1306 
1307  Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1308 
1309  Double_t minValError = 0.0;
1310  Log() << "Compute initial loss on the validation data " << Endl;
1311  for (auto batch : validationData) {
1312  auto inputTensor = batch.GetInput();
1313  auto outputMatrix = batch.GetOutput();
1314  auto weights = batch.GetWeights();
1315 
1316  //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1317  // should we apply droput to the loss ??
1318  minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1319  }
1320  // add Regularization term
1321  Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1322  minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1323  minValError += regzTerm;
1324 
1325 
1326  // create a pointer to base class VOptimizer
1327  std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1328 
1329  // initialize the base class pointer with the corresponding derived class object.
1330  switch (O) {
1331 
1332  case EOptimizer::kSGD:
1333  optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1334  new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1335  break;
1336 
1337  case EOptimizer::kAdam:
1338  optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1339  new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1340  break;
1341 
1342  case EOptimizer::kAdagrad:
1343  optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1344  new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1345  break;
1346 
1347  case EOptimizer::kRMSProp:
1348  optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1349  new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1350  break;
1351 
1352  case EOptimizer::kAdadelta:
1353  optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1354  new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1355  break;
1356  }
1357 
1358 
1359  // Initialize the vector of batches, one batch for one slave network
1360  std::vector<TTensorBatch<Architecture_t>> batches{};
1361 
1362  bool converged = false;
1363  size_t convergenceCount = 0;
1364  size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1365 
1366  // start measuring
1367  std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1368  tstart = std::chrono::system_clock::now();
1369 
1370  Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1371  << " Optimizer " << settings.optimizerName
1372  << " Learning rate = " << settings.learningRate
1373  << " regularization " << (char) settings.regularization
1374  << " minimum error = " << minValError
1375  << Endl;
1376  if (!fInteractive) {
1377  std::string separator(62, '-');
1378  Log() << separator << Endl;
1379  Log() << std::setw(10) << "Epoch"
1380  << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1381  << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1382  << std::setw(12) << "nEvents/s"
1383  << std::setw(12) << "Conv. Steps" << Endl;
1384  Log() << separator << Endl;
1385  }
1386 
1387  // set up generator for shuffling the batches
1388  // if seed is zero we have always a different order in the batches
1389  size_t shuffleSeed = 0;
1390  if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1391  RandomGenerator<TRandom3> rng(shuffleSeed);
1392 
1393  // print weights before
1394  if (fBuildNet && debug) {
1395  Log() << "Initial Deep Net Weights " << Endl;
1396  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1397  for (size_t l = 0; l < weights_tensor.size(); ++l)
1398  weights_tensor[l].Print();
1399  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1400  bias_tensor[0].Print();
1401  }
1402 
1403  Log() << " Start epoch iteration ..." << Endl;
1404  bool debugFirstEpoch = false;
1405  bool computeLossInTraining = true; // compute loss in training or at test time
1406  size_t nTrainEpochs = 0;
1407  while (!converged) {
1408  nTrainEpochs++;
1409  trainingData.Shuffle(rng);
1410 
1411  // execute all epochs
1412  //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1413 
1414  Double_t trainingError = 0;
1415  for (size_t i = 0; i < batchesInEpoch; ++i ) {
1416  // Clean and load new batches, one batch for one slave net
1417  //batches.clear();
1418  //batches.reserve(nThreads);
1419  //for (size_t j = 0; j < nThreads; j++) {
1420  // batches.push_back(trainingData.GetTensorBatch());
1421  //}
1422  if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1423 
1424  auto my_batch = trainingData.GetTensorBatch();
1425 
1426  if (debugFirstEpoch)
1427  std::cout << "got batch data - doing forward \n";
1428 
1429 #ifdef DEBUG
1430 
1431  Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1432  typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1433  typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1434  Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1435  Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1436 #endif
1437 
1438  deepNet.Forward(my_batch.GetInput(), true);
1439  // compute also loss
1440  if (computeLossInTraining) {
1441  auto outputMatrix = my_batch.GetOutput();
1442  auto weights = my_batch.GetWeights();
1443  trainingError += deepNet.Loss(outputMatrix, weights, false);
1444  }
1445 
1446  if (debugFirstEpoch)
1447  std::cout << "- doing backward \n";
1448 
1449 #ifdef DEBUG
1450  size_t nlayers = deepNet.GetLayers().size();
1451  for (size_t l = 0; l < nlayers; ++l) {
1452  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1453  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1454  TString::Format("initial weights layer %d", l).Data());
1455 
1456  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1457  TString::Format("output tensor layer %d", l).Data());
1458  }
1459 #endif
1460 
1461  //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1462 
1463  deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1464 
1465  if (debugFirstEpoch)
1466  std::cout << "- doing optimizer update \n";
1467 
1468  // increment optimizer step that is used in some algorithms (e.g. ADAM)
1469  optimizer->IncrementGlobalStep();
1470  optimizer->Step();
1471 
1472 #ifdef DEBUG
1473  std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1474  for (size_t l = 0; l < nlayers; ++l) {
1475  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1476  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1477  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1478  }
1479  }
1480 #endif
1481 
1482  }
1483 
1484  if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1485  //}
1486  debugFirstEpoch = false;
1487  if ((nTrainEpochs % settings.testInterval) == 0) {
1488 
1489  std::chrono::time_point<std::chrono::system_clock> t1,t2;
1490 
1491  t1 = std::chrono::system_clock::now();
1492 
1493  // Compute validation error.
1494 
1495 
1496  Double_t valError = 0.0;
1497  bool inTraining = false;
1498  for (auto batch : validationData) {
1499  auto inputTensor = batch.GetInput();
1500  auto outputMatrix = batch.GetOutput();
1501  auto weights = batch.GetWeights();
1502  // should we apply droput to the loss ??
1503  valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1504  }
1505  // normalize loss to number of batches and add regularization term
1506  Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1507  valError /= (Double_t)(nValidationSamples / settings.batchSize);
1508  valError += regTerm;
1509 
1510  //Log the loss value
1511  fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1512 
1513  t2 = std::chrono::system_clock::now();
1514 
1515  // checking for convergence
1516  if (valError < minValError) {
1517  convergenceCount = 0;
1518  } else {
1519  convergenceCount += settings.testInterval;
1520  }
1521 
1522  // copy configuration when reached a minimum error
1523  if (valError < minValError ) {
1524  // Copy weights from deepNet to fNet
1525  Log() << std::setw(10) << nTrainEpochs
1526  << " Minimum Test error found - save the configuration " << Endl;
1527  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1528  fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1529  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1530  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1531  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1532  // }
1533  }
1534  // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1535  // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1536 
1537  minValError = valError;
1538  }
1539  else if ( minValError <= 0. )
1540  minValError = valError;
1541 
1542  if (!computeLossInTraining) {
1543  trainingError = 0.0;
1544  // Compute training error.
1545  for (auto batch : trainingData) {
1546  auto inputTensor = batch.GetInput();
1547  auto outputMatrix = batch.GetOutput();
1548  auto weights = batch.GetWeights();
1549  trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1550  }
1551  }
1552  // normalize loss to number of batches and add regularization term
1553  trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1554  trainingError += regTerm;
1555 
1556  //Log the loss value
1557  fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1558 
1559  // stop measuring
1560  tend = std::chrono::system_clock::now();
1561 
1562  // Compute numerical throughput.
1563  std::chrono::duration<double> elapsed_seconds = tend - tstart;
1564  std::chrono::duration<double> elapsed1 = t1-tstart;
1565  // std::chrono::duration<double> elapsed2 = t2-tstart;
1566  // time to compute training and test errors
1567  std::chrono::duration<double> elapsed_testing = tend-t1;
1568 
1569  double seconds = elapsed_seconds.count();
1570  // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1571  // nGFlops *= deepnet.GetNFlops() * 1e-9;
1572  double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1573 
1574  converged =
1575  convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1576 
1577 
1578  Log() << std::setw(10) << nTrainEpochs << " | "
1579  << std::setw(12) << trainingError
1580  << std::setw(12) << valError
1581  << std::setw(12) << seconds / settings.testInterval
1582  << std::setw(12) << elapsed_testing.count()
1583  << std::setw(12) << 1. / eventTime
1584  << std::setw(12) << convergenceCount
1585  << Endl;
1586 
1587  if (converged) {
1588  Log() << Endl;
1589  }
1590  tstart = std::chrono::system_clock::now();
1591  }
1592 
1593  // if (stepCount % 10 == 0 || converged) {
1594  if (converged && debug) {
1595  Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1596  << Endl;
1597  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1598  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1599  for (size_t l = 0; l < weights_tensor.size(); ++l)
1600  weights_tensor[l].Print();
1601  bias_tensor[0].Print();
1602  }
1603 
1604  }
1605 
1606  trainingPhase++;
1607  } // end loop on training Phase
1608 }
1609 
1610 ////////////////////////////////////////////////////////////////////////////////
1612 {
1613  if (fInteractive) {
1614  Log() << kFATAL << "Not implemented yet" << Endl;
1615  return;
1616  }
1617 
1618  // using for training same scalar type defined for the prediction
1619  if (this->GetArchitectureString() == "GPU") {
1620 #ifdef R__HAS_TMVAGPU
1621  Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1622 #ifdef R__HAS_CUDNN
1623  TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1624 #else
1625  TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1626 #endif
1627 #else
1628  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1629  "you have CUDA installed and it was successfully "
1630  "detected by CMAKE."
1631  << Endl;
1632  return;
1633 #endif
1634  } else if (this->GetArchitectureString() == "CPU") {
1635 #ifdef R__HAS_TMVACPU
1636  // note that number of threads used for BLAS might be different
1637  // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1638  Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1639  << gConfig().GetNCpu() << Endl << Endl;
1640 #else
1641  Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1642  << Endl;
1643 #endif
1644  TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1645  return;
1646  }
1647  else {
1648  Log() << kFATAL << this->GetArchitectureString() <<
1649  " is not a supported architecture for TMVA::MethodDL"
1650  << Endl;
1651  }
1652 
1653 }
1654 
1655 
1656 ////////////////////////////////////////////////////////////////////////////////
1657 Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1658 {
1659 
1660  // note that fNet should have been build with a batch size of 1
1661 
1662  if (!fNet || fNet->GetDepth() == 0) {
1663  Log() << kFATAL << "The network has not been trained and fNet is not built"
1664  << Endl;
1665  }
1666 
1667  // input size must be equal to 1 which is the batch size of fNet
1668  R__ASSERT(fNet->GetBatchSize() == 1);
1669 
1670  // int batchWidth = fNet->GetBatchWidth();
1671  // int batchDepth = fNet->GetBatchDepth();
1672  // int batchHeight = fNet->GetBatchHeight();
1673 // int noutput = fNet->GetOutputWidth();
1674 
1675 
1676  // get current event
1677  const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1678 
1679  size_t nVariables = GetEvent()->GetNVariables();
1680 
1681  // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1683  R__ASSERT(fXInput.GetShape().size() < 4);
1684  size_t nc, nhw = 0;
1685  if (fXInput.GetShape().size() == 2) {
1686  nc = fXInput.GetShape()[0];
1687  if (nc != 1 ) {
1689  Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1690  << Endl;
1691  }
1692  nhw = fXInput.GetShape()[1];
1693  } else {
1694  nc = fXInput.GetCSize();
1695  nhw = fXInput.GetWSize();
1696  }
1697  if ( nVariables != nc * nhw) {
1698  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1699  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1700  << Endl;
1701  }
1702  for (size_t j = 0; j < nc; j++) {
1703  for (size_t k = 0; k < nhw; k++) {
1704  // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1705  fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1706  }
1707  }
1708  } else {
1709  // row-wise layout
1710  assert(fXInput.GetShape().size() >= 4);
1711  size_t nc = fXInput.GetCSize();
1712  size_t nh = fXInput.GetHSize();
1713  size_t nw = fXInput.GetWSize();
1714  size_t n = nc * nh * nw;
1715  if ( nVariables != n) {
1716  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1717  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1718  << Endl;
1719  }
1720  for (size_t j = 0; j < n; j++) {
1721  // in this case TMVA event has same order as input tensor
1722  fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1723  }
1724  }
1725  // copy buffer in input
1726  fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1727 
1728  // perform the prediction
1729  fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1730 
1731  // return value
1732  double mvaValue = (*fYHat)(0, 0);
1733 
1734  // for debugging
1735 #ifdef DEBUG_MVAVALUE
1736  using Tensor_t = std::vector<MatrixImpl_t>;
1737  TMatrixF xInput(n1,n2, inputValues.data() );
1738  std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1739  xInput.Print();
1740  std::cout << "Output of DeepNet " << mvaValue << std::endl;
1741  auto & deepnet = *fNet;
1742  std::cout << "Loop on layers " << std::endl;
1743  for (int l = 0; l < deepnet.GetDepth(); ++l) {
1744  std::cout << "Layer " << l;
1745  const auto * layer = deepnet.GetLayerAt(l);
1746  const Tensor_t & layer_output = layer->GetOutput();
1747  layer->Print();
1748  std::cout << "DNN output " << layer_output.size() << std::endl;
1749  for (size_t i = 0; i < layer_output.size(); ++i) {
1750 #ifdef R__HAS_TMVAGPU
1751  //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1752  TMatrixD m = layer_output[i];
1753 #else
1754  TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1755 #endif
1756  m.Print();
1757  }
1758  const Tensor_t & layer_weights = layer->GetWeights();
1759  std::cout << "DNN weights " << layer_weights.size() << std::endl;
1760  if (layer_weights.size() > 0) {
1761  int i = 0;
1762 #ifdef R__HAS_TMVAGPU
1763  TMatrixD m = layer_weights[i];
1764 // TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1765 #else
1766  TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1767 #endif
1768  m.Print();
1769  }
1770  }
1771 #endif
1772 
1773  return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1774 }
1775 ////////////////////////////////////////////////////////////////////////////////
1776 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1777 ////////////////////////////////////////////////////////////////////////////////
1778 template <typename Architecture_t>
1779 std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1780 {
1781 
1782  // Check whether the model is setup
1783  if (!fNet || fNet->GetDepth() == 0) {
1784  Log() << kFATAL << "The network has not been trained and fNet is not built"
1785  << Endl;
1786  }
1787 
1788  // rebuild the networks
1789  this->SetBatchSize(batchSize);
1790  size_t inputDepth = this->GetInputDepth();
1791  size_t inputHeight = this->GetInputHeight();
1792  size_t inputWidth = this->GetInputWidth();
1793  size_t batchDepth = this->GetBatchDepth();
1794  size_t batchHeight = this->GetBatchHeight();
1795  size_t batchWidth = this->GetBatchWidth();
1796  ELossFunction J = fNet->GetLossFunction();
1797  EInitialization I = fNet->GetInitialization();
1798  ERegularization R = fNet->GetRegularization();
1799  Double_t weightDecay = fNet->GetWeightDecay();
1800 
1801  using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1802  using Matrix_t = typename Architecture_t::Matrix_t;
1803  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1804 
1805  // create the deep neural network
1806  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1807  std::vector<DeepNet_t> nets{};
1808  fBuildNet = false;
1809  CreateDeepNet(deepNet,nets);
1810 
1811  // copy weights from the saved fNet to the built DeepNet
1812  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1813  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1814  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1815  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1816  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1817  // }
1818  }
1819 
1820  size_t n1 = deepNet.GetBatchHeight();
1821  size_t n2 = deepNet.GetBatchWidth();
1822  size_t n0 = deepNet.GetBatchSize();
1823  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1824  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1825  n1 = deepNet.GetBatchSize();
1826  n0 = 1;
1827  }
1828  //this->SetBatchDepth(n0);
1829  Long64_t nEvents = lastEvt - firstEvt;
1830  TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1831  TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1832 
1833 
1834  // Tensor_t xInput;
1835  // for (size_t i = 0; i < n0; ++i)
1836  // xInput.emplace_back(Matrix_t(n1,n2));
1837 
1838  // create pointer to output matrix used for the predictions
1839  Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1840 
1841  // use timer
1842  Timer timer( nEvents, GetName(), kTRUE );
1843 
1844  if (logProgress)
1845  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1846  << "Evaluation of " << GetMethodName() << " on "
1847  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1848  << " sample (" << nEvents << " events)" << Endl;
1849 
1850 
1851  // eventg loop
1852  std::vector<double> mvaValues(nEvents);
1853 
1854 
1855  for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1856 
1857  Long64_t ievt_end = ievt + batchSize;
1858  // case of batch prediction for
1859  if (ievt_end <= lastEvt) {
1860 
1861  if (ievt == firstEvt) {
1862  Data()->SetCurrentEvent(ievt);
1863  size_t nVariables = GetEvent()->GetNVariables();
1864 
1865  if (n1 == batchSize && n0 == 1) {
1866  if (n2 != nVariables) {
1867  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1868  << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1869  << Endl;
1870  }
1871  } else {
1872  if (n1*n2 != nVariables || n0 != batchSize) {
1873  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1874  << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1875  << Endl;
1876  }
1877  }
1878  }
1879 
1880  auto batch = testData.GetTensorBatch();
1881  auto inputTensor = batch.GetInput();
1882 
1883  auto xInput = batch.GetInput();
1884  // make the prediction
1885  deepNet.Prediction(yHat, xInput, fOutputFunction);
1886  for (size_t i = 0; i < batchSize; ++i) {
1887  double value = yHat(i,0);
1888  mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1889  }
1890  }
1891  else {
1892  // case of remaining events: compute prediction by single event !
1893  for (Long64_t i = ievt; i < lastEvt; ++i) {
1894  Data()->SetCurrentEvent(i);
1895  mvaValues[i] = GetMvaValue();
1896  }
1897  }
1898  }
1899 
1900  if (logProgress) {
1901  Log() << kINFO
1902  << "Elapsed time for evaluation of " << nEvents << " events: "
1903  << timer.GetElapsedTime() << " " << Endl;
1904  }
1905 
1906  return mvaValues;
1907 }
1908 
1909 const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1910 {
1911  size_t nVariables = GetEvent()->GetNVariables();
1912  MatrixImpl_t X(1, nVariables);
1913  TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1914  const Event *ev = GetEvent();
1915  const std::vector<Float_t>& inputValues = ev->GetValues();
1916  for (size_t i = 0; i < nVariables; i++) {
1917  X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1918  }
1919  //X_vec.emplace_back(X);
1920 
1921  size_t nTargets = std::max(1u, ev->GetNTargets());
1922  MatrixImpl_t YHat(1, nTargets);
1923  std::vector<Float_t> output(nTargets);
1924  fNet->Prediction(YHat, X_vec, fOutputFunction);
1925 
1926  for (size_t i = 0; i < nTargets; i++)
1927  output[i] = YHat(0, i);
1928 
1929  if (fRegressionReturnVal == NULL) {
1930  fRegressionReturnVal = new std::vector<Float_t>();
1931  }
1932  fRegressionReturnVal->clear();
1933 
1934  Event * evT = new Event(*ev);
1935  for (size_t i = 0; i < nTargets; ++i) {
1936  evT->SetTarget(i, output[i]);
1937  }
1938 
1939  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1940  for (size_t i = 0; i < nTargets; ++i) {
1941  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1942  }
1943  delete evT;
1944  return *fRegressionReturnVal;
1945 }
1946 
1947 const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1948 {
1949  size_t nVariables = GetEvent()->GetNVariables();
1950  MatrixImpl_t X(1, nVariables);
1951  TensorImpl_t X_vec ( 1, 1, nVariables);
1952  MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1953  if (fMulticlassReturnVal == NULL) {
1954  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1955  }
1956 
1957  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1958  for (size_t i = 0; i < nVariables; i++) {
1959  X_vec(0,i, 0) = inputValues[i];
1960  }
1961  //X_vec.emplace_back(X);
1962  fNet->Prediction(YHat, X_vec, fOutputFunction);
1963  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1964  (*fMulticlassReturnVal)[i] = YHat(0, i);
1965  }
1966  return *fMulticlassReturnVal;
1967 }
1968 
1969 
1970 ////////////////////////////////////////////////////////////////////////////////
1971 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1972 ////////////////////////////////////////////////////////////////////////////////
1973 std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
1974 {
1975  // Long64_t nEvents = Data()->GetNEvents();
1976  // std::vector<Double_t> v(nEvents);
1977  // for (Long64_t i = 0; i < nEvents; ++i) {
1978  // Data()->SetCurrentEvent(i);
1979  // v[i] = GetMvaValue();
1980  // }
1981  // return v;
1982 
1983 
1984  Long64_t nEvents = Data()->GetNEvents();
1985  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
1986  if (firstEvt < 0) firstEvt = 0;
1987  nEvents = lastEvt-firstEvt;
1988 
1989  // use same batch size as for training (from first strategy)
1990  size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
1991  size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
1992  if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
1993 
1994  // using for training same scalar type defined for the prediction
1995  if (this->GetArchitectureString() == "GPU") {
1996 #ifdef R__HAS_TMVAGPU
1997  Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
1998 #ifdef R__HAS_CUDNN
1999  return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2000 #else
2001  return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2002 #endif
2003 
2004 #endif
2005  }
2006  Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2007  return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2008 }
2009 ////////////////////////////////////////////////////////////////////////////////
2010 void MethodDL::AddWeightsXMLTo(void * parent) const
2011 {
2012  // Create the parent XML node with name "Weights"
2013  auto & xmlEngine = gTools().xmlengine();
2014  void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2015 
2016  /*! Get all necessary information, in order to be able to reconstruct the net
2017  * if we read the same XML file. */
2018 
2019  // Deep Net specific info
2020  Int_t depth = fNet->GetDepth();
2021 
2022  Int_t inputDepth = fNet->GetInputDepth();
2023  Int_t inputHeight = fNet->GetInputHeight();
2024  Int_t inputWidth = fNet->GetInputWidth();
2025 
2026  Int_t batchSize = fNet->GetBatchSize();
2027 
2028  Int_t batchDepth = fNet->GetBatchDepth();
2029  Int_t batchHeight = fNet->GetBatchHeight();
2030  Int_t batchWidth = fNet->GetBatchWidth();
2031 
2032  char lossFunction = static_cast<char>(fNet->GetLossFunction());
2033  char initialization = static_cast<char>(fNet->GetInitialization());
2034  char regularization = static_cast<char>(fNet->GetRegularization());
2035 
2036  Double_t weightDecay = fNet->GetWeightDecay();
2037 
2038  // Method specific info (not sure these are needed)
2039  char outputFunction = static_cast<char>(this->GetOutputFunction());
2040  //char lossFunction = static_cast<char>(this->GetLossFunction());
2041 
2042  // Add attributes to the parent node
2043  xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2044 
2045  xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2046  xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2047  xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2048 
2049  xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2050  xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2051  xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2052  xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2053 
2054  xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2055  xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2056  xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2057  xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2058 
2059  gTools().AddAttr(nn, "WeightDecay", weightDecay);
2060 
2061 
2062  for (Int_t i = 0; i < depth; i++)
2063  {
2064  fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2065  }
2066 
2067 
2068 }
2069 
2070 ////////////////////////////////////////////////////////////////////////////////
2071 void MethodDL::ReadWeightsFromXML(void * rootXML)
2072 {
2073 
2074  auto netXML = gTools().GetChild(rootXML, "Weights");
2075  if (!netXML){
2076  netXML = rootXML;
2077  }
2078 
2079  size_t netDepth;
2080  gTools().ReadAttr(netXML, "NetDepth", netDepth);
2081 
2082  size_t inputDepth, inputHeight, inputWidth;
2083  gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2084  gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2085  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2086 
2087  size_t batchSize, batchDepth, batchHeight, batchWidth;
2088  gTools().ReadAttr(netXML, "BatchSize", batchSize);
2089  // use always batchsize = 1
2090  //batchSize = 1;
2091  gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2092  gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2093  gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2094 
2095  char lossFunctionChar;
2096  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2097  char initializationChar;
2098  gTools().ReadAttr(netXML, "Initialization", initializationChar);
2099  char regularizationChar;
2100  gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2101  char outputFunctionChar;
2102  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2103  double weightDecay;
2104  gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2105 
2106  // create the net
2107 
2108  // DeepNetCpu_t is defined in MethodDL.h
2109  this->SetInputDepth(inputDepth);
2110  this->SetInputHeight(inputHeight);
2111  this->SetInputWidth(inputWidth);
2112  this->SetBatchDepth(batchDepth);
2113  this->SetBatchHeight(batchHeight);
2114  this->SetBatchWidth(batchWidth);
2115 
2116 
2117 
2118  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2119  batchHeight, batchWidth,
2120  static_cast<ELossFunction>(lossFunctionChar),
2121  static_cast<EInitialization>(initializationChar),
2122  static_cast<ERegularization>(regularizationChar),
2123  weightDecay));
2124 
2125  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2126 
2127 
2128  //size_t previousWidth = inputWidth;
2129  auto layerXML = gTools().xmlengine().GetChild(netXML);
2130 
2131  // loop on the layer and add them to the network
2132  for (size_t i = 0; i < netDepth; i++) {
2133 
2134  TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2135 
2136  // case of dense layer
2137  if (layerName == "DenseLayer") {
2138 
2139  // read width and activation function and then we can create the layer
2140  size_t width = 0;
2141  gTools().ReadAttr(layerXML, "Width", width);
2142 
2143  // Read activation function.
2144  TString funcString;
2145  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2146  EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2147 
2148 
2149  fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2150 
2151  }
2152  // Convolutional Layer
2153  else if (layerName == "ConvLayer") {
2154 
2155  // read width and activation function and then we can create the layer
2156  size_t depth = 0;
2157  gTools().ReadAttr(layerXML, "Depth", depth);
2158  size_t fltHeight, fltWidth = 0;
2159  size_t strideRows, strideCols = 0;
2160  size_t padHeight, padWidth = 0;
2161  gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2162  gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2163  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2164  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2165  gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2166  gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2167 
2168  // Read activation function.
2169  TString funcString;
2170  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2171  EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2172 
2173 
2174  fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2175  padHeight, padWidth, actFunction);
2176 
2177  }
2178 
2179  // MaxPool Layer
2180  else if (layerName == "MaxPoolLayer") {
2181 
2182  // read maxpool layer info
2183  size_t filterHeight, filterWidth = 0;
2184  size_t strideRows, strideCols = 0;
2185  gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2186  gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2187  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2188  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2189 
2190  fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2191  }
2192  // Reshape Layer
2193  else if (layerName == "ReshapeLayer") {
2194 
2195  // read reshape layer info
2196  size_t depth, height, width = 0;
2197  gTools().ReadAttr(layerXML, "Depth", depth);
2198  gTools().ReadAttr(layerXML, "Height", height);
2199  gTools().ReadAttr(layerXML, "Width", width);
2200  int flattening = 0;
2201  gTools().ReadAttr(layerXML, "Flattening",flattening );
2202 
2203  fNet->AddReshapeLayer(depth, height, width, flattening);
2204 
2205  }
2206  // RNN Layer
2207  else if (layerName == "RNNLayer") {
2208 
2209  // read RNN layer info
2210  size_t stateSize,inputSize, timeSteps = 0;
2211  int rememberState= 0;
2212  int returnSequence = 0;
2213  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2214  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2215  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2216  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2217  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2218 
2219  fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2220 
2221  }
2222  // LSTM Layer
2223  else if (layerName == "LSTMLayer") {
2224 
2225  // read RNN layer info
2226  size_t stateSize,inputSize, timeSteps = 0;
2227  int rememberState, returnSequence = 0;
2228  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2229  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2230  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2231  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2232  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2233 
2234  fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2235 
2236  }
2237  // GRU Layer
2238  else if (layerName == "GRULayer") {
2239 
2240  // read RNN layer info
2241  size_t stateSize,inputSize, timeSteps = 0;
2242  int rememberState, returnSequence, resetGateAfter = 0;
2243  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2244  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2245  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2246  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2247  gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2248  gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2249 
2250  if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2251  Warning("ReadWeightsFromXML",
2252  "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2253 
2254  fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2255  }
2256  // BatchNorm Layer
2257  else if (layerName == "BatchNormLayer") {
2258  // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2259  fNet->AddBatchNormLayer(0., 0.0);
2260  }
2261  // read weights and biases
2262  fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2263 
2264  // read next layer
2265  layerXML = gTools().GetNextChild(layerXML);
2266  }
2267 
2268  fBuildNet = false;
2269  // create now the input and output matrices
2270  //int n1 = batchHeight;
2271  //int n2 = batchWidth;
2272  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2273  //if (fXInput.size() > 0) fXInput.clear();
2274  //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2276  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2277  // make here a ColumnMajor tensor
2279  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize());
2280 
2281  // create pointer to output matrix used for the predictions
2282  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2283 
2284 
2285 }
2286 
2287 
2288 ////////////////////////////////////////////////////////////////////////////////
2289 void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2290 {
2291 }
2292 
2293 ////////////////////////////////////////////////////////////////////////////////
2295 {
2296  // TODO
2297  return NULL;
2298 }
2299 
2300 ////////////////////////////////////////////////////////////////////////////////
2302 {
2303  // TODO
2304 }
2305 
2306 } // namespace TMVA
TMVA::Experimental::MemoryLayout::ColumnMajor
@ ColumnMajor
TMVA::DNN::TDeepNet::AddReshapeLayer
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:773
TMVA::MethodDL::Train
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1611
make_cnn_model.optimizer
optimizer
Definition: make_cnn_model.py:15
l
auto * l
Definition: textangle.C:4
TMVA::DNN::EInitialization::kUniform
@ kUniform
Adadelta.h
ROOT::TMetaUtils::propNames::separator
static const std::string separator("@@@")
m
auto * m
Definition: textangle.C:8
Adam.h
n
const Int_t n
Definition: legend1.C:16
TMVA::MethodDL::TrainDeepNet
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1140
TMVA::MethodDL::fBuildNet
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:194
TMVA::MethodDL::ParseMaxPoolLayer
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:744
TMVA::DNN::TDeepNet::AddConvLayer
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:439
TMVA::MethodDL::fYHat
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:201
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::Configurable::Log
MsgLogger & Log() const
Definition: Configurable.h:122
TMVA::DataSet::GetCurrentType
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:194
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
TMVA::MethodDL::fOutputFunction
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:182
TMVA::TTrainingSettings::optimizerName
TString optimizerName
Definition: MethodDL.h:77
TMVA::DNN::EOptimizer
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::DNN::EInitialization::kGlorotUniform
@ kGlorotUniform
TMVA::DNN::TDeepNet::AddBatchNormLayer
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:825
TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TMVA::MethodBase::Data
DataSet * Data() const
Definition: MethodBase.h:408
TMVA::DNN::EOutputFunction::kSigmoid
@ kSigmoid
TObjArray
An array of TObjects.
Definition: TObjArray.h:37
TMVA::MethodDL::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2010
TMVA::MethodDL::GetHelpMessage
void GetHelpMessage() const
Definition: MethodDL.cxx:2301
TMVA::MethodDL::GetArchitectureString
TString GetArchitectureString() const
Definition: MethodDL.h:271
TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:712
TString::Atoi
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TMVA::DNN
Definition: Adadelta.h:36
TFormula
The Formula class.
Definition: TFormula.h:87
TString::Strip
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TObject::Print
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:552
TMVA::DNN::EInitialization::kZero
@ kZero
TMVA::MethodDL::HostBufferImpl_t
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:107
TMVA::DNN::TDeepNet::AddDenseLayer
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:740
TMVA::DNN::ERegularization
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
TString::Data
const char * Data() const
Definition: TString.h:369
TMVA::MethodDL::GetMulticlassValues
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1947
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::Ranking
Ranking for variables in method (implementation)
Definition: Ranking.h:48
TMVA::DNN::EInitialization::kIdentity
@ kIdentity
TObjString.h
TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:86
TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
TMVA::DNN::ERegularization::kL1
@ kL1
TMVA::TTrainingSettings::maxEpochs
size_t maxEpochs
Definition: MethodDL.h:74
IMethod.h
TMVA::DNN::EOptimizer::kAdam
@ kAdam
TMVA::MethodDL::CreateDeepNet
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:503
Long64_t
long long Long64_t
Definition: RtypesCore.h:73
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TObject::Error
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:890
output
static void output(int code)
Definition: gifencode.c:226
TMVA::MethodDL::GetInputHeight
size_t GetInputHeight() const
Definition: MethodDL.h:249
TMVA::MethodDL::DeclareOptions
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:160
TMVA::RandomGenerator
Definition: Tools.h:305
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
TMVA::MethodDL::CreateRanking
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2294
TMVA::MethodDL::ParseReshapeLayer
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:805
TMVA::MethodDL::GetBatchWidth
size_t GetBatchWidth() const
Definition: MethodDL.h:257
TMVA::MethodDL::PredictDeepNet
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1779
width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
TMVA::MethodDL::~MethodDL
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1021
TMVA::DNN::ELossFunction::kMeanSquaredError
@ kMeanSquaredError
TMVA::TTrainingSettings::learningRate
Double_t learningRate
Definition: MethodDL.h:78
TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
TObjString::GetString
const TString & GetString() const
Definition: TObjString.h:46
TMVA::DNN::TDeepNet::AddBasicRNNLayer
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:524
TMVA::MethodDL::ParseRecurrentLayer
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:907
TMVA::DNN::CNN::TConvLayer
Definition: ConvLayer.h:75
TMVA::DNN::ELossFunction::kCrossEntropy
@ kCrossEntropy
TString::Length
Ssiz_t Length() const
Definition: TString.h:410
TMVA::MethodDL::SetInputHeight
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:280
TMVA::DNN::ERegularization::kNone
@ kNone
TMVA::MethodDL::SetBatchHeight
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:286
TXMLEngine::GetChild
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
Definition: TXMLEngine.cxx:1143
TMVA::DNN::EInitialization::kGlorotNormal
@ kGlorotNormal
TMVA::DNN::EActivationFunction::kSoftSign
@ kSoftSign
TMVA::DNN::TDenseLayer
Generic layer class.
Definition: DenseLayer.h:59
TMVA::MethodDL::DeepNetImpl_t
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:103
TMVA::MsgLogger::GetMinType
EMsgType GetMinType() const
Definition: MsgLogger.h:71
TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
TMVA::DNN::TCpu::PrintTensor
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:862
TMVA::DNN::EOptimizer::kSGD
@ kSGD
TMVA::DNN::EActivationFunction::kGauss
@ kGauss
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
TMVA::MethodDL::fTrainingSettings
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:197
TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
TMVA::MethodDL::Init
void Init()
default initializations
Definition: MethodDL.cxx:406
TMVA::MethodDL::SetInputDepth
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:279
TMVA::DNN::EActivationFunction::kTanh
@ kTanh
TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
BatchHelpers::block
constexpr size_t block
Definition: BatchHelpers.h:29
TMVA::DNN::TTensorDataLoader
TTensorDataLoader.
Definition: TensorDataLoader.h:133
TString
Basic string class.
Definition: TString.h:136
TMatrixT
TMatrixT.
Definition: TMatrixT.h:39
TMVA::MethodDL::GetMvaValue
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1657
TMVA::MethodDL::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1067
TMVA::MethodDL::fXInputBuffer
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:200
TMVA::MethodDL::MatrixImpl_t
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:104
TString.h
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
bool
TString::ReplaceAll
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:692
TMVA::MethodDL::SetBatchWidth
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:287
TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:268
TString::kBoth
@ kBoth
Definition: TString.h:267
TMVA::DNN::TAdagrad
Adagrad Optimizer class.
Definition: Adagrad.h:45
TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
DLMinimizers.h
TString::ToUpper
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjString
Collectable string class.
Definition: TObjString.h:28
TMVA::TTrainingSettings::weightDecay
Double_t weightDecay
Definition: MethodDL.h:80
TMVA::Tools::AddAttr
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
TMVA::MethodDL::GetLossFunction
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:263
TMVA::TTrainingSettings::batchSize
size_t batchSize
Definition: MethodDL.h:71
R
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
TMVA::MethodDL::ParseKeyValueString
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1028
TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition: GeneralLayer.h:51
TMVA::MethodDL::ParseInputLayout
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:413
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
TensorDataLoader.h
TMVA::DNN::TAdam
Adam Optimizer class.
Definition: Adam.h:45
TString::Tokenize
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
TMVA::MethodBase::fInteractive
IPythonInteractive * fInteractive
Definition: MethodBase.h:446
TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
TMVA::DNN::TRMSProp
RMSProp Optimizer class.
Definition: RMSProp.h:45
TMVA::MethodBase::GetMethodName
const TString & GetMethodName() const
Definition: MethodBase.h:330
TMVA::MethodDL::ERecurrentLayerType
ERecurrentLayerType
Definition: MethodDL.h:150
Timer.h
TMVA::Event::GetValues
std::vector< Float_t > & GetValues()
Definition: Event.h:94
TString::kTrailing
@ kTrailing
Definition: TString.h:267
TMVA::DNN::EActivationFunction::kFastTanh
@ kFastTanh
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
TNamed::Print
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
TMath::IsNaN
Bool_t IsNaN(Double_t x)
Definition: TMath.h:892
TMVA::Event::GetNVariables
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
epsilon
REAL epsilon
Definition: triangle.c:617
TMVA::DNN::EActivationFunction::kSymmRelu
@ kSymmRelu
TMVA::TrainingHistory::AddValue
void AddValue(TString Property, Int_t stage, Double_t value)
Definition: TrainingHistory.cxx:47
TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
TMVA::DNN::ERegularization::kL2
@ kL2
TMVA::gConfig
Config & gConfig()
TMVA::MethodDL::GetTrainingSettings
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:273
TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::MethodDL::ParseBatchLayout
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:456
TMVA::MethodDL::SetBatchDepth
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:285
TMVA::MethodDL::SetInputWidth
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:281
TMVA::DNN::EActivationFunction::kRelu
@ kRelu
SGD.h
TMVA::TTrainingSettings::regularization
DNN::ERegularization regularization
Definition: MethodDL.h:75
TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TFormula::Eval
Double_t Eval(Double_t x) const
TString::First
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:499
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TMVA::MethodDL::GetRegressionValues
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1909
TMVA::MethodDL::ReadWeightsFromStream
virtual void ReadWeightsFromStream(std::istream &)=0
Methods for writing and reading weights.
Adagrad.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
TMVA::Tools::StringFromInt
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
TMVA::MethodDL::ProcessOptions
void ProcessOptions()
Definition: MethodDL.cxx:212
TMVA::DNN::TAdadelta
Adadelta Optimizer class.
Definition: Adadelta.h:45
TMVA::DNN::EOutputFunction::kIdentity
@ kIdentity
TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73
TMVA::MethodDL::fNet
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:202
TMVA::DNN::EOutputFunction::kSoftmax
@ kSoftmax
TMVA::MethodBase::GetEventCollection
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Definition: MethodBase.cxx:3340
Types.h
Configurable.h
TMVA::DNN::EOptimizer::kRMSProp
@ kRMSProp
TObject::Warning
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:876
TMVA::MethodDL::ParseDenseLayer
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:557
TMVA::TTrainingSettings::optimizer
DNN::EOptimizer optimizer
Definition: MethodDL.h:76
TMVA::MethodDL::KeyValueVector_t
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:90
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::DNN::EInitialization
EInitialization
Definition: Functions.h:72
TMVA::TTrainingSettings::convergenceSteps
size_t convergenceSteps
Definition: MethodDL.h:73
TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
Functions.h
TMVA::DNN::TDeepNet::AddMaxPoolLayer
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:485
MethodDL.h
TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:88
TMVA::DNN::EOptimizer::kAdadelta
@ kAdadelta
Double_t
double Double_t
Definition: RtypesCore.h:59
R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:120
TMVA::MethodDL::ParseBatchNormLayer
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:866
t1
auto * t1
Definition: textangle.C:20
TMVA::TTrainingSettings::momentum
Double_t momentum
Definition: MethodDL.h:79
TMVA::MethodDL::MethodDL
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:995
TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
TMVA::DNN::EInitialization::kGauss
@ kGauss
TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:333
TMVA::DNN::ELossFunction::kSoftmaxCrossEntropy
@ kSoftmaxCrossEntropy
TMVA::Event
Definition: Event.h:51
RMSProp.h
TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition: MethodBase.h:749
TMVA::MethodDL::GetBatchDepth
size_t GetBatchDepth() const
Definition: MethodDL.h:255
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
TMVA::DNN::TDeepNet::AddBasicGRULayer
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:608
TMVA::MethodDL::GetInputDepth
size_t GetInputDepth() const
Definition: MethodDL.h:248
TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
TMVA::DNN::EActivationFunction::kIdentity
@ kIdentity
I
#define I(x, y, z)
TIter
Definition: TCollection.h:233
TMVA::MethodBase::fTrainHistory
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
TMVA::MethodDL
Definition: MethodDL.h:86
TMVA::MethodDL::GetWeightInitialization
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:261
TMVA::TTrainingSettings::testInterval
size_t testInterval
Definition: MethodDL.h:72
TMVA::MethodDL::ParseConvLayer
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:645
TMVA::DNN::TCpu::CreateTensor
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
TMVA::MethodDL::GetMvaValues
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:1973
Tools.h
TMVA::DNN::TCpu::IsCudnn
static bool IsCudnn()
Definition: Cpu.h:131
ClassifierFactory.h
type
int type
Definition: TGX11.cxx:121
TMVA::MethodDL::fRandomSeed
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:179
TMVA::MethodDL::SetBatchSize
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:284
TMVA::MethodDL::GetBatchHeight
size_t GetBatchHeight() const
Definition: MethodDL.h:256
TXMLEngine::GetNodeName
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
Definition: TXMLEngine.cxx:1072
TMVA::gTools
Tools & gTools()
TMVA::MethodDL::GetInputWidth
size_t GetInputWidth() const
Definition: MethodDL.h:250
TMVA::MethodDL::GetOutputFunction
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:262
TMVA::MethodDL::fXInput
TensorImpl_t fXInput
Definition: MethodDL.h:199
TMVA::TTrainingSettings
All of the options that can be specified in the training string.
Definition: MethodDL.h:70
TMVA::MethodDL::GetNumValidationSamples
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
TMVA::DNN::TDeepNet::AddBasicLSTMLayer
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:567
TMVA::MethodDL::TensorImpl_t
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:105
TMVA::DNN::TDeepNet
Generic Deep Neural Network class.
Definition: DeepNet.h:73
TMVA::DNN::EOptimizer::kAdagrad
@ kAdagrad
TMVA::fetchValueTmp
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:68
TMVA::DNN::CNN
Definition: ContextHandles.h:43
TMath.h
TMVA::TTrainingSettings::dropoutProbabilities
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:81
TMVA::Event::GetNTargets
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
TMVA::DNN::TSGD
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:46
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:72
TMVA::MethodDL::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2071