Logo ROOT   6.14/05
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1  // @(#)root/tmva/tmva/cnn:$Id$Ndl
2 // Author: Vladimir Ilievski, Saurav Shekhar
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDL *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Deep Neural Network Method *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #include "TFormula.h"
29 #include "TString.h"
30 #include "TMath.h"
31 
32 #include "TMVA/Tools.h"
33 #include "TMVA/Configurable.h"
34 #include "TMVA/IMethod.h"
35 #include "TMVA/ClassifierFactory.h"
36 #include "TMVA/MethodDL.h"
37 #include "TMVA/Types.h"
39 #include "TMVA/DNN/Functions.h"
40 #include "TMVA/DNN/DLMinimizers.h"
41 #include "TStopwatch.h"
42 
43 #include <chrono>
44 
47 
48 using namespace TMVA::DNN::CNN;
49 using namespace TMVA::DNN;
50 
55 
56 namespace TMVA {
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
60 {
61  key.ToUpper();
62  std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
63  if (it == keyValueMap.end()) {
64  return TString("");
65  }
66  return it->second;
67 }
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 template <typename T>
71 T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 template <>
75 int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
76 {
77  TString value(fetchValueTmp(keyValueMap, key));
78  if (value == "") {
79  return defaultValue;
80  }
81  return value.Atoi();
82 }
83 
84 ////////////////////////////////////////////////////////////////////////////////
85 template <>
86 double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
87 {
88  TString value(fetchValueTmp(keyValueMap, key));
89  if (value == "") {
90  return defaultValue;
91  }
92  return value.Atof();
93 }
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 template <>
97 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
98 {
99  TString value(fetchValueTmp(keyValueMap, key));
100  if (value == "") {
101  return defaultValue;
102  }
103  return value;
104 }
105 
106 ////////////////////////////////////////////////////////////////////////////////
107 template <>
108 bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
109 {
110  TString value(fetchValueTmp(keyValueMap, key));
111  if (value == "") {
112  return defaultValue;
113  }
114 
115  value.ToUpper();
116  if (value == "TRUE" || value == "T" || value == "1") {
117  return true;
118  }
119 
120  return false;
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 template <>
125 std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
126  std::vector<double> defaultValue)
127 {
128  TString parseString(fetchValueTmp(keyValueMap, key));
129  if (parseString == "") {
130  return defaultValue;
131  }
132 
133  parseString.ToUpper();
134  std::vector<double> values;
135 
136  const TString tokenDelim("+");
137  TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
138  TIter nextToken(tokenStrings);
139  TObjString *tokenString = (TObjString *)nextToken();
140  for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
141  std::stringstream sstr;
142  double currentValue;
143  sstr << tokenString->GetString().Data();
144  sstr >> currentValue;
145  values.push_back(currentValue);
146  }
147  return values;
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////////
152 {
153  // Set default values for all option strings
154 
155  DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
156 
157  DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
158 
159  DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
160 
161  DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
162  " or cross entropy (binary classification).");
163  AddPreDefVal(TString("CROSSENTROPY"));
164  AddPreDefVal(TString("SUMOFSQUARES"));
165  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
166 
167  DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
168  AddPreDefVal(TString("XAVIER"));
169  AddPreDefVal(TString("XAVIERUNIFORM"));
170 
171  DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
172 
173 
174  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
175  AddPreDefVal(TString("STANDARD"));
176  AddPreDefVal(TString("CPU"));
177  AddPreDefVal(TString("GPU"));
178  AddPreDefVal(TString("OPENCL"));
179 
180  DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
181  "Momentum=0.3,"
182  "Repetitions=3,"
183  "ConvergenceSteps=50,"
184  "BatchSize=30,"
185  "TestRepetitions=7,"
186  "WeightDecay=0.0,"
187  "Renormalize=L2,"
188  "DropConfig=0.0,"
189  "DropRepetitions=5|LearningRate=1e-4,"
190  "Momentum=0.3,"
191  "Repetitions=3,"
192  "ConvergenceSteps=50,"
193  "MaxEpochs=2000,"
194  "BatchSize=20,"
195  "TestRepetitions=7,"
196  "WeightDecay=0.001,"
197  "Renormalize=L2,"
198  "DropConfig=0.0+0.5+0.5,"
199  "DropRepetitions=5,"
200  "Multithreading=True",
201  "TrainingStrategy", "Defines the training strategies.");
202 }
203 
204 ////////////////////////////////////////////////////////////////////////////////
206 {
207 
208  if (IgnoreEventsWithNegWeightsInTraining()) {
209  Log() << kINFO << "Will ignore negative events in training!" << Endl;
210  }
211 
212  if (fArchitectureString == "STANDARD") {
213  Log() << kINFO << "The STANDARD architecture has been deprecated. "
214  "Please use Architecture=CPU or Architecture=CPU."
215  "See the TMVA Users' Guide for instructions if you "
216  "encounter problems."
217  << Endl;
218  }
219  if (fArchitectureString == "OPENCL") {
220  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
221  "Please use Architecture=CPU or Architecture=CPU for the "
222  "time being. See the TMVA Users' Guide for instructions "
223  "if you encounter problems."
224  << Endl;
225  }
226 
227  // the architecture can now be set at runtime as an option
228 
229 
230  if (fArchitectureString == "GPU") {
231 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
232  Log() << kERROR << "CUDA backend not enabled. Please make sure "
233  "you have CUDA installed and it was successfully "
234  "detected by CMAKE by using -Dcuda=On "
235  << Endl;
236 #ifdef R__HAS_TMVACPU
237  fArchitectureString = "CPU";
238  Log() << kINFO << "Will use now the CPU architecture !" << Endl;
239 #else
240  fArchitectureString = "Standard";
241  Log() << kINFO << "Will use now the Standard architecture !" << Endl;
242 #endif
243 #else
244  Log() << kINFO << "Will use now the GPU architecture !" << Endl;
245 #endif
246  }
247 
248  else if (fArchitectureString == "CPU") {
249 #ifndef R__HAS_TMVACPU // TMVA has no CPU support
250  Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
251  "you have a BLAS implementation and it was successfully "
252  "detected by CMake as well that the imt CMake flag is set."
253  << Endl;
254 #ifdef R__HAS_TMVAGPU
255  fArchitectureString = "GPU";
256  Log() << kINFO << "Will use now the GPU architecture !" << Endl;
257 #else
258  fArchitectureString = "STANDARD";
259  Log() << kINFO << "Will use now the Standard architecture !" << Endl;
260 #endif
261 #else
262  Log() << kINFO << "Will use now the CPU architecture !" << Endl;
263 #endif
264  }
265 
266  else {
267  Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;
268  fArchitectureString = "STANDARD";
269  }
270 
271  // Input Layout
272  ParseInputLayout();
273  ParseBatchLayout();
274 
275  // Loss function and output.
276  fOutputFunction = EOutputFunction::kSigmoid;
277  if (fAnalysisType == Types::kClassification) {
278  if (fErrorStrategy == "SUMOFSQUARES") {
279  fLossFunction = ELossFunction::kMeanSquaredError;
280  }
281  if (fErrorStrategy == "CROSSENTROPY") {
282  fLossFunction = ELossFunction::kCrossEntropy;
283  }
284  fOutputFunction = EOutputFunction::kSigmoid;
285  } else if (fAnalysisType == Types::kRegression) {
286  if (fErrorStrategy != "SUMOFSQUARES") {
287  Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
288  << " neural net error function. Setting error function to "
289  << " SUMOFSQUARES now." << Endl;
290  }
291 
292  fLossFunction = ELossFunction::kMeanSquaredError;
293  fOutputFunction = EOutputFunction::kIdentity;
294  } else if (fAnalysisType == Types::kMulticlass) {
295  if (fErrorStrategy == "SUMOFSQUARES") {
296  fLossFunction = ELossFunction::kMeanSquaredError;
297  }
298  if (fErrorStrategy == "CROSSENTROPY") {
299  fLossFunction = ELossFunction::kCrossEntropy;
300  }
301  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
302  fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
303  }
304  fOutputFunction = EOutputFunction::kSoftmax;
305  }
306 
307  // Initialization
308  // the biases will be always initialized to zero
309  if (fWeightInitializationString == "XAVIER") {
310  fWeightInitialization = DNN::EInitialization::kGlorotNormal;
311  } else if (fWeightInitializationString == "XAVIERUNIFORM") {
312  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
313  } else if (fWeightInitializationString == "GAUSS") {
314  fWeightInitialization = DNN::EInitialization::kGauss;
315  } else if (fWeightInitializationString == "UNIFORM") {
316  fWeightInitialization = DNN::EInitialization::kUniform;
317  } else if (fWeightInitializationString == "ZERO") {
318  fWeightInitialization = DNN::EInitialization::kZero;
319  } else {
320  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
321  }
322 
323  // Training settings.
324 
325  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));
326  for (auto &block : strategyKeyValues) {
327  TTrainingSettings settings;
328 
329  settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
330  settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
331  settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
332  settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
333  settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
334  settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
335  settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
336  settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
337 
338  TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
339  if (regularization == "L1") {
341  } else if (regularization == "L2") {
343  }
344 
345  TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
346 
347  if (strMultithreading.BeginsWith("T")) {
348  settings.multithreading = true;
349  } else {
350  settings.multithreading = false;
351  }
352 
353  fTrainingSettings.push_back(settings);
354  }
355 }
356 
357 ////////////////////////////////////////////////////////////////////////////////
358 /// default initializations
360 {
361  // Nothing to do here
362 }
363 
364 ////////////////////////////////////////////////////////////////////////////////
365 /// Parse the input layout
367 {
368  // Define the delimiter
369  const TString delim("|");
370 
371  // Get the input layout string
372  TString inputLayoutString = this->GetInputLayoutString();
373 
374  size_t depth = 0;
375  size_t height = 0;
376  size_t width = 0;
377 
378  // Split the input layout string
379  TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
380  TIter nextInputDim(inputDimStrings);
381  TObjString *inputDimString = (TObjString *)nextInputDim();
382  int idxToken = 0;
383 
384  for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
385  switch (idxToken) {
386  case 0: // input depth
387  {
388  TString strDepth(inputDimString->GetString());
389  depth = (size_t)strDepth.Atoi();
390  } break;
391  case 1: // input height
392  {
393  TString strHeight(inputDimString->GetString());
394  height = (size_t)strHeight.Atoi();
395  } break;
396  case 2: // input width
397  {
398  TString strWidth(inputDimString->GetString());
399  width = (size_t)strWidth.Atoi();
400  } break;
401  }
402  ++idxToken;
403  }
404 
405  this->SetInputDepth(depth);
406  this->SetInputHeight(height);
407  this->SetInputWidth(width);
408 }
409 
410 ////////////////////////////////////////////////////////////////////////////////
411 /// Parse the input layout
413 {
414  // Define the delimiter
415  const TString delim("|");
416 
417  // Get the input layout string
418  TString batchLayoutString = this->GetBatchLayoutString();
419 
420  size_t batchDepth = 0;
421  size_t batchHeight = 0;
422  size_t batchWidth = 0;
423 
424  // Split the input layout string
425  TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
426  TIter nextBatchDim(batchDimStrings);
427  TObjString *batchDimString = (TObjString *)nextBatchDim();
428  int idxToken = 0;
429 
430  for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
431  switch (idxToken) {
432  case 0: // input depth
433  {
434  TString strDepth(batchDimString->GetString());
435  batchDepth = (size_t)strDepth.Atoi();
436  } break;
437  case 1: // input height
438  {
439  TString strHeight(batchDimString->GetString());
440  batchHeight = (size_t)strHeight.Atoi();
441  } break;
442  case 2: // input width
443  {
444  TString strWidth(batchDimString->GetString());
445  batchWidth = (size_t)strWidth.Atoi();
446  } break;
447  }
448  ++idxToken;
449  }
450 
451  this->SetBatchDepth(batchDepth);
452  this->SetBatchHeight(batchHeight);
453  this->SetBatchWidth(batchWidth);
454 }
455 
456 ////////////////////////////////////////////////////////////////////////////////
457 /// Create a deep net based on the layout string
458 template <typename Architecture_t, typename Layer_t>
460  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)
461 {
462  // Layer specification, layer details
463  const TString layerDelimiter(",");
464  const TString subDelimiter("|");
465 
466  TString layoutString = this->GetLayoutString();
467 
468  //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
469 
470  // Split layers
471  TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
472  TIter nextLayer(layerStrings);
473  TObjString *layerString = (TObjString *)nextLayer();
474 
475 
476  for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
477  // Split layer details
478  TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
479  TIter nextToken(subStrings);
480  TObjString *token = (TObjString *)nextToken();
481 
482  // Determine the type of the layer
483  TString strLayerType = token->GetString();
484 
485 
486  if (strLayerType == "DENSE") {
487  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
488  } else if (strLayerType == "CONV") {
489  ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
490  } else if (strLayerType == "MAXPOOL") {
491  ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
492  } else if (strLayerType == "RESHAPE") {
493  ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
494  } else if (strLayerType == "RNN") {
495  ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
496  } else if (strLayerType == "LSTM") {
497  Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl;
498  //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);
499  }
500  }
501 }
502 
503 ////////////////////////////////////////////////////////////////////////////////
504 /// Pases the layer string and creates the appropriate dense layer
505 template <typename Architecture_t, typename Layer_t>
507  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
508  TString delim)
509 {
510  int width = 0;
511  EActivationFunction activationFunction = EActivationFunction::kTanh;
512 
513  // this return number of input variables for the method
514  // it can be used to deduce width of dense layer if specified as N+10
515  // where N is the number of input variables
516  const size_t inputSize = GetNvar();
517 
518  // Split layer details
519  TObjArray *subStrings = layerString.Tokenize(delim);
520  TIter nextToken(subStrings);
521  TObjString *token = (TObjString *)nextToken();
522  int idxToken = 0;
523 
524  // loop on the tokens
525  // order of sepcifying width and activation function is not relevant
526  // both 100|TANH and TANH|100 are valid cases
527  for (; token != nullptr; token = (TObjString *)nextToken()) {
528  idxToken++;
529  // first token defines the layer type- skip it
530  if (idxToken == 1) continue;
531  // try a match with the activation function
532  TString strActFnc(token->GetString());
533  if (strActFnc == "RELU") {
534  activationFunction = DNN::EActivationFunction::kRelu;
535  } else if (strActFnc == "TANH") {
536  activationFunction = DNN::EActivationFunction::kTanh;
537  } else if (strActFnc == "SYMMRELU") {
538  activationFunction = DNN::EActivationFunction::kSymmRelu;
539  } else if (strActFnc == "SOFTSIGN") {
540  activationFunction = DNN::EActivationFunction::kSoftSign;
541  } else if (strActFnc == "SIGMOID") {
542  activationFunction = DNN::EActivationFunction::kSigmoid;
543  } else if (strActFnc == "LINEAR") {
544  activationFunction = DNN::EActivationFunction::kIdentity;
545  } else if (strActFnc == "GAUSS") {
546  activationFunction = DNN::EActivationFunction::kGauss;
547  } else if (width == 0) {
548  // no match found try to parse as text showing the width
549  // support for input a formula where the variable 'x' is 'N' in the string
550  // use TFormula for the evaluation
551  TString strNumNodes = strActFnc;
552  // number of nodes
553  TString strN("x");
554  strNumNodes.ReplaceAll("N", strN);
555  strNumNodes.ReplaceAll("n", strN);
556  TFormula fml("tmp", strNumNodes);
557  width = fml.Eval(inputSize);
558  }
559 
560  }
561 
562  // Add the dense layer, initialize the weights and biases and copy
563  TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
564  denseLayer->Initialize();
565 
566  // add same layer to fNet
567  if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
568 
569  //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
570 
571  // add the copy to all slave nets
572  //for (size_t i = 0; i < nets.size(); i++) {
573  // nets[i].AddDenseLayer(copyDenseLayer);
574  //}
575 
576  // check compatibility of added layer
577  // for a dense layer input should be 1 x 1 x DxHxW
578 }
579 
580 ////////////////////////////////////////////////////////////////////////////////
581 /// Pases the layer string and creates the appropriate convolutional layer
582 template <typename Architecture_t, typename Layer_t>
584  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585  TString delim)
586 {
587  int depth = 0;
588  int fltHeight = 0;
589  int fltWidth = 0;
590  int strideRows = 0;
591  int strideCols = 0;
592  int zeroPadHeight = 0;
593  int zeroPadWidth = 0;
594  EActivationFunction activationFunction = EActivationFunction::kTanh;
595 
596  // Split layer details
597  TObjArray *subStrings = layerString.Tokenize(delim);
598  TIter nextToken(subStrings);
599  TObjString *token = (TObjString *)nextToken();
600  int idxToken = 0;
601 
602  for (; token != nullptr; token = (TObjString *)nextToken()) {
603  switch (idxToken) {
604  case 1: // depth
605  {
606  TString strDepth(token->GetString());
607  depth = strDepth.Atoi();
608  } break;
609  case 2: // filter height
610  {
611  TString strFltHeight(token->GetString());
612  fltHeight = strFltHeight.Atoi();
613  } break;
614  case 3: // filter width
615  {
616  TString strFltWidth(token->GetString());
617  fltWidth = strFltWidth.Atoi();
618  } break;
619  case 4: // stride in rows
620  {
621  TString strStrideRows(token->GetString());
622  strideRows = strStrideRows.Atoi();
623  } break;
624  case 5: // stride in cols
625  {
626  TString strStrideCols(token->GetString());
627  strideCols = strStrideCols.Atoi();
628  } break;
629  case 6: // zero padding height
630  {
631  TString strZeroPadHeight(token->GetString());
632  zeroPadHeight = strZeroPadHeight.Atoi();
633  } break;
634  case 7: // zero padding width
635  {
636  TString strZeroPadWidth(token->GetString());
637  zeroPadWidth = strZeroPadWidth.Atoi();
638  } break;
639  case 8: // activation function
640  {
641  TString strActFnc(token->GetString());
642  if (strActFnc == "RELU") {
643  activationFunction = DNN::EActivationFunction::kRelu;
644  } else if (strActFnc == "TANH") {
645  activationFunction = DNN::EActivationFunction::kTanh;
646  } else if (strActFnc == "SYMMRELU") {
647  activationFunction = DNN::EActivationFunction::kSymmRelu;
648  } else if (strActFnc == "SOFTSIGN") {
649  activationFunction = DNN::EActivationFunction::kSoftSign;
650  } else if (strActFnc == "SIGMOID") {
651  activationFunction = DNN::EActivationFunction::kSigmoid;
652  } else if (strActFnc == "LINEAR") {
653  activationFunction = DNN::EActivationFunction::kIdentity;
654  } else if (strActFnc == "GAUSS") {
655  activationFunction = DNN::EActivationFunction::kGauss;
656  }
657  } break;
658  }
659  ++idxToken;
660  }
661 
662  // Add the convolutional layer, initialize the weights and biases and copy
663  TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
664  zeroPadHeight, zeroPadWidth, activationFunction);
665  convLayer->Initialize();
666 
667  // Add same layer to fNet
668  if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
669  zeroPadHeight, zeroPadWidth, activationFunction);
670 
671  //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
672 
673  //// add the copy to all slave nets
674  //for (size_t i = 0; i < nets.size(); i++) {
675  // nets[i].AddConvLayer(copyConvLayer);
676  //}
677 }
678 
679 ////////////////////////////////////////////////////////////////////////////////
680 /// Pases the layer string and creates the appropriate max pool layer
681 template <typename Architecture_t, typename Layer_t>
683  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
684  TString delim)
685 {
686 
687  int frameHeight = 0;
688  int frameWidth = 0;
689  int strideRows = 0;
690  int strideCols = 0;
691 
692  // Split layer details
693  TObjArray *subStrings = layerString.Tokenize(delim);
694  TIter nextToken(subStrings);
695  TObjString *token = (TObjString *)nextToken();
696  int idxToken = 0;
697 
698  for (; token != nullptr; token = (TObjString *)nextToken()) {
699  switch (idxToken) {
700  case 1: // frame height
701  {
702  TString strFrmHeight(token->GetString());
703  frameHeight = strFrmHeight.Atoi();
704  } break;
705  case 2: // frame width
706  {
707  TString strFrmWidth(token->GetString());
708  frameWidth = strFrmWidth.Atoi();
709  } break;
710  case 3: // stride in rows
711  {
712  TString strStrideRows(token->GetString());
713  strideRows = strStrideRows.Atoi();
714  } break;
715  case 4: // stride in cols
716  {
717  TString strStrideCols(token->GetString());
718  strideCols = strStrideCols.Atoi();
719  } break;
720  }
721  ++idxToken;
722  }
723 
724  // Add the Max pooling layer
725  // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
726  deepNet.AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
727 
728  // Add the same layer to fNet
729  if (fBuildNet) fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
730 
731  //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
732 
733  //// add the copy to all slave nets
734  //for (size_t i = 0; i < nets.size(); i++) {
735  // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
736  //}
737 }
738 
739 ////////////////////////////////////////////////////////////////////////////////
740 /// Pases the layer string and creates the appropriate reshape layer
741 template <typename Architecture_t, typename Layer_t>
743  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
744  TString delim)
745 {
746  int depth = 0;
747  int height = 0;
748  int width = 0;
749  bool flattening = false;
750 
751  // Split layer details
752  TObjArray *subStrings = layerString.Tokenize(delim);
753  TIter nextToken(subStrings);
754  TObjString *token = (TObjString *)nextToken();
755  int idxToken = 0;
756 
757  for (; token != nullptr; token = (TObjString *)nextToken()) {
758  if (token->GetString() == "FLAT") idxToken=4;
759  switch (idxToken) {
760  case 1: {
761  TString strDepth(token->GetString());
762  depth = strDepth.Atoi();
763  } break;
764  case 2: // height
765  {
766  TString strHeight(token->GetString());
767  height = strHeight.Atoi();
768  } break;
769  case 3: // width
770  {
771  TString strWidth(token->GetString());
772  width = strWidth.Atoi();
773  } break;
774  case 4: // flattening
775  {
776  TString flat(token->GetString());
777  if (flat == "FLAT") {
778  flattening = true;
779  }
780  } break;
781  }
782  ++idxToken;
783  }
784 
785  // Add the reshape layer
786  // TReshapeLayer<Architecture_t> *reshapeLayer =
787  deepNet.AddReshapeLayer(depth, height, width, flattening);
788 
789  // Add the same layer to fNet
790  if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
791 
792  //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
793 
794  //// add the copy to all slave nets
795  //for (size_t i = 0; i < nets.size(); i++) {
796  // nets[i].AddReshapeLayer(copyReshapeLayer);
797  //}
798 }
799 
800 ////////////////////////////////////////////////////////////////////////////////
801 /// Pases the layer string and creates the appropriate rnn layer
802 template <typename Architecture_t, typename Layer_t>
804  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
805  TString delim)
806 {
807  // int depth = 0;
808  int stateSize = 0;
809  int inputSize = 0;
810  int timeSteps = 0;
811  bool rememberState = false;
812 
813  // Split layer details
814  TObjArray *subStrings = layerString.Tokenize(delim);
815  TIter nextToken(subStrings);
816  TObjString *token = (TObjString *)nextToken();
817  int idxToken = 0;
818 
819  for (; token != nullptr; token = (TObjString *)nextToken()) {
820  switch (idxToken) {
821  case 1: // state size
822  {
823  TString strstateSize(token->GetString());
824  stateSize = strstateSize.Atoi();
825  } break;
826  case 2: // input size
827  {
828  TString strinputSize(token->GetString());
829  inputSize = strinputSize.Atoi();
830  } break;
831  case 3: // time steps
832  {
833  TString strtimeSteps(token->GetString());
834  timeSteps = strtimeSteps.Atoi();
835  }
836  case 4: // remember state (1 or 0)
837  {
838  TString strrememberState(token->GetString());
839  rememberState = (bool) strrememberState.Atoi();
840  } break;
841  }
842  ++idxToken;
843  }
844 
845  // Add the recurrent layer, initialize the weights and biases and copy
846  TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
847  timeSteps, rememberState);
848  basicRNNLayer->Initialize();
849 
850  // Add same layer to fNet
851  if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
852 
853  //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);
854 
855  //// add the copy to all slave nets
856  //for (size_t i = 0; i < nets.size(); i++) {
857  // nets[i].AddBasicRNNLayer(copyRNNLayer);
858  //}
859 }
860 
861 ////////////////////////////////////////////////////////////////////////////////
862 /// Pases the layer string and creates the appropriate lstm layer
863 template <typename Architecture_t, typename Layer_t>
865  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
866  TString delim)
867 {
868  // Split layer details
869  TObjArray *subStrings = layerString.Tokenize(delim);
870  TIter nextToken(subStrings);
871  TObjString *token = (TObjString *)nextToken();
872  int idxToken = 0;
873 
874  for (; token != nullptr; token = (TObjString *)nextToken()) {
875  switch (idxToken) {
876  }
877  ++idxToken;
878  }
879 }
880 
881 ////////////////////////////////////////////////////////////////////////////////
882 /// Standard constructor.
883 MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
884  : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
885  fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
886  fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
887  fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
888 {
889  // Nothing to do here
890 }
891 
892 ////////////////////////////////////////////////////////////////////////////////
893 /// Constructor from a weight file.
894 MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
895  : MethodBase(Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),
899 {
900  // Nothing to do here
901 }
902 
903 ////////////////////////////////////////////////////////////////////////////////
904 /// Destructor.
906 {
907  // Nothing to do here
908 }
909 
910 ////////////////////////////////////////////////////////////////////////////////
911 /// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
912 auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
913 {
914  // remove empty spaces
915  parseString.ReplaceAll(" ","");
916  KeyValueVector_t blockKeyValues;
917  const TString keyValueDelim("=");
918 
919  TObjArray *blockStrings = parseString.Tokenize(blockDelim);
920  TIter nextBlock(blockStrings);
921  TObjString *blockString = (TObjString *)nextBlock();
922 
923  for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
924  blockKeyValues.push_back(std::map<TString, TString>());
925  std::map<TString, TString> &currentBlock = blockKeyValues.back();
926 
927  TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
928  TIter nextToken(subStrings);
929  TObjString *token = (TObjString *)nextToken();
930 
931  for (; token != nullptr; token = (TObjString *)nextToken()) {
932  TString strKeyValue(token->GetString());
933  int delimPos = strKeyValue.First(keyValueDelim.Data());
934  if (delimPos <= 0) continue;
935 
936  TString strKey = TString(strKeyValue(0, delimPos));
937  strKey.ToUpper();
938  TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
939 
940  strKey.Strip(TString::kBoth, ' ');
941  strValue.Strip(TString::kBoth, ' ');
942 
943  currentBlock.insert(std::make_pair(strKey, strValue));
944  }
945  }
946  return blockKeyValues;
947 }
948 
949 ////////////////////////////////////////////////////////////////////////////////
950 /// What kind of analysis type can handle the CNN
952 {
953  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
954  if (type == Types::kMulticlass) return kTRUE;
955  if (type == Types::kRegression) return kTRUE;
956 
957  return kFALSE;
958 }
959 
960 
961 ////////////////////////////////////////////////////////////////////////////////
962 /// Implementation of architecture specific train method
963 ///
964 template <typename Architecture_t>
966 {
967 
968  using Scalar_t = typename Architecture_t::Scalar_t;
969  using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
970  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
971 
972  bool debug = Log().GetMinType() == kDEBUG;
973 
974  // Determine the number of training and testing examples
975  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size();
976  size_t nTestSamples = GetEventCollection(Types::kTesting).size();
977 
978  // Determine the number of outputs
979  // // size_t outputSize = 1;
980  // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
981  // // outputSize = GetNTargets();
982  // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
983  // // outputSize = DataInfo().GetNClasses();
984  // // }
985 
986  // set the random seed for weight initialization
987  Architecture_t::SetRandomSeed(fRandomSeed);
988 
989  size_t trainingPhase = 1;
990  for (TTrainingSettings &settings : this->GetTrainingSettings()) {
991 
992  size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
993 
994 
995  // After the processing of the options, initialize the master deep net
996  size_t batchSize = settings.batchSize;
997  // Should be replaced by actual implementation. No support for this now.
998  size_t inputDepth = this->GetInputDepth();
999  size_t inputHeight = this->GetInputHeight();
1000  size_t inputWidth = this->GetInputWidth();
1001  size_t batchDepth = this->GetBatchDepth();
1002  size_t batchHeight = this->GetBatchHeight();
1003  size_t batchWidth = this->GetBatchWidth();
1004  ELossFunction J = this->GetLossFunction();
1006  ERegularization R = settings.regularization;
1007  Scalar_t weightDecay = settings.weightDecay;
1008 
1009  //Batch size should be included in batch layout as well. There are two possibilities:
1010  // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1011  // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1012  // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1013  // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1014 
1015  if (batchDepth != batchSize && batchDepth > 1) {
1016  Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1017  return;
1018  }
1019  if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1020  Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1021  return;
1022  }
1023 
1024 
1025  //check also that input layout compatible with batch layout
1026  bool badLayout = false;
1027  // case batch depth == batch size
1028  if (batchDepth == batchSize)
1029  badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1030  // case batch Height is batch size
1031  if (batchHeight == batchSize && batchDepth == 1)
1032  badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1033  if (badLayout) {
1034  Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1035  inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1036  return;
1037  }
1038 
1039 
1040  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1041 
1042  // create a copy of DeepNet for evaluating but with batch size = 1
1043  // fNet is the saved network and will be with CPU or Referrence architecture
1044  if (trainingPhase == 1) {
1045  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1046  batchHeight, batchWidth, J, I, R, weightDecay));
1047  fBuildNet = true;
1048  }
1049  else
1050  fBuildNet = false;
1051 
1052  // Initialize the vector of slave nets
1053  std::vector<DeepNet_t> nets{};
1054  nets.reserve(nThreads);
1055  for (size_t i = 0; i < nThreads; i++) {
1056  // create a copies of the master deep net
1057  nets.push_back(deepNet);
1058  }
1059 
1060  // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1061  CreateDeepNet(deepNet, nets);
1062 
1063  if (trainingPhase > 1) {
1064  // copy initial weights from fNet to deepnet
1065  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1066  const auto & nLayer = fNet->GetLayerAt(i);
1067  const auto & dLayer = deepNet.GetLayerAt(i);
1068  // could use a traits for detecting equal architectures
1069  // dLayer->CopyWeights(nLayer->GetWeights());
1070  // dLayer->CopyBiases(nLayer->GetBiases());
1071  Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1072  Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1073  }
1074  }
1075 
1076  // print the created network
1077  if (fBuildNet) {
1078  Log() << "***** Deep Learning Network *****" << Endl;
1079  if (Log().GetMinType() <= kINFO)
1080  deepNet.Print();
1081  }
1082 
1083  // Loading the training and testing datasets
1084  TMVAInput_t trainingTuple = std::tie(GetEventCollection(Types::kTraining), DataInfo());
1085  TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1086  deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1087  deepNet.GetOutputWidth(), nThreads);
1088 
1089  TMVAInput_t testTuple = std::tie(GetEventCollection(Types::kTesting), DataInfo());
1090  TensorDataLoader_t testingData(testTuple, nTestSamples, deepNet.GetBatchSize(),
1091  deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1092  deepNet.GetOutputWidth(), nThreads);
1093 
1094  // Initialize the minimizer
1095  DNN::TDLGradientDescent<Architecture_t> minimizer(settings.learningRate, settings.convergenceSteps,
1096  settings.testInterval);
1097 
1098  // Initialize the vector of batches, one batch for one slave network
1099  std::vector<TTensorBatch<Architecture_t>> batches{};
1100 
1101  bool converged = false;
1102  // count the steps until the convergence
1103  size_t stepCount = 0;
1104  size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1105 
1106  // start measuring
1107  std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1108  tstart = std::chrono::system_clock::now();
1109 
1110  Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ":" << Endl;
1111  if (!fInteractive) {
1112  std::string separator(62, '-');
1113  Log() << separator << Endl;
1114  Log() << std::setw(10) << "Epoch"
1115  << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Test Err."
1116  << std::setw(12) << "t(s)/epoch" << std::setw(12) << "Eval t(s)"
1117  << std::setw(12) << "nEvents/s"
1118  << std::setw(12) << "Conv. Steps" << Endl;
1119  Log() << separator << Endl;
1120  }
1121 
1122  // set up generator for shuffling the batches
1123  // if seed is zero we have always a different order in the batches
1124  size_t shuffleSeed = 0;
1125  if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1126  RandomGenerator<TRandom3> rng(shuffleSeed);
1127 
1128  // print weights before
1129  if (fBuildNet && debug) {
1130  Log() << "Initial Deep Net Weights " << Endl;
1131  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1132  for (size_t l = 0; l < weights_tensor.size(); ++l)
1133  weights_tensor[l].Print();
1134  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1135  bias_tensor[0].Print();
1136  }
1137 
1138  Double_t minTestError = 0;
1139 
1140  while (!converged) {
1141  stepCount++;
1142  trainingData.Shuffle(rng);
1143 
1144  // execute all epochs
1145  //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1146 
1147  for (size_t i = 0; i < batchesInEpoch; ++i ) {
1148  // Clean and load new batches, one batch for one slave net
1149  //batches.clear();
1150  //batches.reserve(nThreads);
1151  //for (size_t j = 0; j < nThreads; j++) {
1152  // batches.push_back(trainingData.GetTensorBatch());
1153  //}
1154 
1155  auto my_batch = trainingData.GetTensorBatch();
1156 
1157 
1158 
1159 
1160  // execute one minimization step
1161  // StepMomentum is currently not written for single thread, TODO write it
1162  if (settings.momentum > 0.0) {
1163  //minimizer.StepMomentum(deepNet, nets, batches, settings.momentum);
1164  minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1165  } else {
1166  //minimizer.Step(deepNet, nets, batches);
1167  minimizer.Step(deepNet, my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1168  }
1169 
1170 
1171  }
1172  //}
1173 
1174 
1175  if ((stepCount % minimizer.GetTestInterval()) == 0) {
1176 
1177  std::chrono::time_point<std::chrono::system_clock> t1,t2;
1178 
1179  t1 = std::chrono::system_clock::now();
1180 
1181  // Compute test error.
1182  Double_t testError = 0.0;
1183  for (auto batch : testingData) {
1184  auto inputTensor = batch.GetInput();
1185  auto outputMatrix = batch.GetOutput();
1186  auto weights = batch.GetWeights();
1187  testError += deepNet.Loss(inputTensor, outputMatrix, weights);
1188  }
1189 
1190 
1191  t2 = std::chrono::system_clock::now();
1192  testError /= (Double_t)(nTestSamples / settings.batchSize);
1193  // copy configuration when reached a minimum error
1194  if (testError < minTestError ) {
1195  // Copy weights from deepNet to fNet
1196  Log() << std::setw(10) << stepCount << " Minimun Test error found - save the configuration " << Endl;
1197  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1198  const auto & nLayer = fNet->GetLayerAt(i);
1199  const auto & dLayer = deepNet.GetLayerAt(i);
1200  //nLayer->CopyWeights(dLayer->GetWeights());
1201  //nLayer->CopyBiases(dLayer->GetBiases());
1202  ArchitectureImpl_t::CopyDiffArch(nLayer->GetWeights(), dLayer->GetWeights() );
1203  ArchitectureImpl_t::CopyDiffArch(nLayer->GetBiases(), dLayer->GetBiases() );
1204  // std::cout << "Weights for layer " << i << std::endl;
1205  // for (size_t k = 0; k < dlayer->GetWeights().size(); ++k)
1206  // dLayer->GetWeightsAt(k).Print();
1207  }
1208  minTestError = testError;
1209  }
1210  else if ( minTestError <= 0. )
1211  minTestError = testError;
1212 
1213 
1214  Double_t trainingError = 0.0;
1215  // Compute training error.
1216  for (auto batch : trainingData) {
1217  auto inputTensor = batch.GetInput();
1218  auto outputMatrix = batch.GetOutput();
1219  auto weights = batch.GetWeights();
1220 
1221  trainingError += deepNet.Loss(inputTensor, outputMatrix, weights);
1222  }
1223  trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1224 
1225  // stop measuring
1226  tend = std::chrono::system_clock::now();
1227 
1228  // Compute numerical throughput.
1229  std::chrono::duration<double> elapsed_seconds = tend - tstart;
1230  std::chrono::duration<double> elapsed1 = t1-tstart;
1231  // std::chrono::duration<double> elapsed2 = t2-tstart;
1232  // time to compute training and test errors
1233  std::chrono::duration<double> elapsed_testing = tend-t1;
1234 
1235 
1236  double seconds = elapsed_seconds.count();
1237  // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1238  // nGFlops *= deepnet.GetNFlops() * 1e-9;
1239  double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1240 
1241  converged = minimizer.HasConverged(testError) || stepCount >= settings.maxEpochs;
1242 
1243  Log() << std::setw(10) << stepCount << " | " << std::setw(12) << trainingError << std::setw(12) << testError
1244  << std::setw(12) << seconds/settings.testInterval
1245  << std::setw(12) << elapsed_testing.count()
1246  << std::setw(12) << 1./eventTime
1247  << std::setw(12) << minimizer.GetConvergenceCount()
1248  << Endl;
1249 
1250  if (converged) {
1251  Log() << Endl;
1252  }
1253  tstart = std::chrono::system_clock::now();
1254  }
1255 
1256  //if (stepCount % 10 == 0 || converged) {
1257  if (converged && debug) {
1258  Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << stepCount << Endl;
1259  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1260  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1261  for (size_t l = 0; l < weights_tensor.size(); ++l)
1262  weights_tensor[l].Print();
1263  bias_tensor[0].Print();
1264  }
1265 
1266 
1267  }
1268 
1269  trainingPhase++;
1270  } // end loop on training Phase
1271 
1272 }
1273 
1274 ////////////////////////////////////////////////////////////////////////////////
1276 {
1277  if (fInteractive) {
1278  Log() << kFATAL << "Not implemented yet" << Endl;
1279  return;
1280  }
1281 
1282  if (this->GetArchitectureString() == "GPU") {
1283 #ifdef R__HAS_TMVAGPU
1284  Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1285  TrainDeepNet<DNN::TCuda<Double_t> >();
1286 #else
1287  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1288  "you have CUDA installed and it was successfully "
1289  "detected by CMAKE."
1290  << Endl;
1291  return;
1292 #endif
1293  } else if (this->GetArchitectureString() == "OPENCL") {
1294  Log() << kFATAL << "OPENCL backend not yet supported." << Endl;
1295  return;
1296  } else if (this->GetArchitectureString() == "CPU") {
1297 #ifdef R__HAS_TMVACPU
1298  Log() << kINFO << "Start of deep neural network training on CPU." << Endl << Endl;
1299  TrainDeepNet<DNN::TCpu<Double_t> >();
1300 #else
1301  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1302  "you have a BLAS implementation and it was successfully "
1303  "detected by CMake as well that the imt CMake flag is set."
1304  << Endl;
1305  return;
1306 #endif
1307  } else if (this->GetArchitectureString() == "STANDARD") {
1308  Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1309  TrainDeepNet<DNN::TReference<Double_t> >();
1310  }
1311  else {
1312  Log() << kFATAL << this->GetArchitectureString() <<
1313  " is not a supported archiectire for TMVA::MethodDL"
1314  << Endl;
1315  }
1316 
1317 // /// definitions for CUDA
1318 // #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.
1319 // using Architecture_t = DNN::TCuda<Double_t>;
1320 // #else
1321 // #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.
1322 // using Architecture_t = DNN::TCpu<Double_t>;
1323 // #else
1324 // using Architecture_t = DNN::TReference<Double_t>;
1325 // #endif
1326 // #endif
1327 }
1328 
1329 
1330 ////////////////////////////////////////////////////////////////////////////////
1331 Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1332 {
1333  using Matrix_t = typename ArchitectureImpl_t::Matrix_t;
1334 
1335  int nVariables = GetEvent()->GetNVariables();
1336  int batchWidth = fNet->GetBatchWidth();
1337  int batchDepth = fNet->GetBatchDepth();
1338  int batchHeight = fNet->GetBatchHeight();
1339  int nb = fNet->GetBatchSize();
1340  int noutput = fNet->GetOutputWidth();
1341 
1342  // note that batch size whould be equal to 1
1343  R__ASSERT(nb == 1);
1344 
1345  std::vector<Matrix_t> X{};
1346  Matrix_t YHat(nb, noutput);
1347 
1348  // get current event
1349  const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1350 
1351  // for (int i = 0; i < batchDepth; ++i)
1352 
1353  // find dimension of matrices
1354  // Tensor outer size must be equal to 1
1355  // because nb ==1 by definition
1356  int n1 = batchHeight;
1357  int n2 = batchWidth;
1358  // treat case where batchHeight is batchSize in case of first Dense layers
1359  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = 1;
1360 
1361  X.emplace_back(Matrix_t(n1, n2));
1362 
1363  if (n1 > 1) {
1364  if (n1*n2 != nVariables) {
1365  std::cout << n1 << " " << batchDepth << " " << GetInputHeight() << " " << GetInputDepth() << std::endl;
1366  }
1367  R__ASSERT( n1*n2 == nVariables);
1368  // for CNN or RNN evaluations
1369  for (int j = 0; j < n1; ++j) {
1370  for (int k = 0; k < n2; k++) {
1371  X[0](j, k) = inputValues[j*n1+k];
1372  }
1373  }
1374  }
1375  else {
1376  R__ASSERT( n2 == nVariables);
1377  for (int k = 0; k < n2; k++) {
1378  X[0](0, k) = inputValues[k];
1379  }
1380  }
1381 
1382  // perform the prediction
1383  fNet->Prediction(YHat, X, fOutputFunction);
1384 
1385  double mvaValue = YHat(0, 0);
1386 
1387  // for debugging
1388 #ifdef DEBUG_MVAVALUE
1389  using Tensor_t = std::vector<Matrix_t>;
1390  TMatrixF xInput(n1,n2, inputValues.data() );
1391  std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1392  xInput.Print();
1393  std::cout << "Output of DeepNet " << mvaValue << std::endl;
1394  auto & deepnet = *fNet;
1395  std::cout << "Loop on layers " << std::endl;
1396  for (int l = 0; l < deepnet.GetDepth(); ++l) {
1397  std::cout << "Layer " << l;
1398  const auto * layer = deepnet.GetLayerAt(l);
1399  const Tensor_t & layer_output = layer->GetOutput();
1400  layer->Print();
1401  std::cout << "DNN output " << layer_output.size() << std::endl;
1402  for (size_t i = 0; i < layer_output.size(); ++i) {
1403 #ifdef R__HAS_TMVAGPU
1404  //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1405  TMatrixD m = layer_output[i];
1406 #else
1407  TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1408 #endif
1409  m.Print();
1410  }
1411  const Tensor_t & layer_weights = layer->GetWeights();
1412  std::cout << "DNN weights " << layer_weights.size() << std::endl;
1413  if (layer_weights.size() > 0) {
1414  int i = 0;
1415 #ifdef R__HAS_TMVAGPU
1416  TMatrixD m = layer_weights[i];
1417 // TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1418 #else
1419  TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1420 #endif
1421  m.Print();
1422  }
1423  }
1424 #endif
1425 
1426 
1427 
1428  return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1429 
1430 }
1431 
1432 ////////////////////////////////////////////////////////////////////////////////
1433 void MethodDL::AddWeightsXMLTo(void * parent) const
1434 {
1435  // Create the parrent XML node with name "Weights"
1436  auto & xmlEngine = gTools().xmlengine();
1437  void* nn = xmlEngine.NewChild(parent, 0, "Weights");
1438 
1439  /*! Get all necessary information, in order to be able to reconstruct the net
1440  * if we read the same XML file. */
1441 
1442  // Deep Net specific info
1443  Int_t depth = fNet->GetDepth();
1444 
1445  Int_t inputDepth = fNet->GetInputDepth();
1446  Int_t inputHeight = fNet->GetInputHeight();
1447  Int_t inputWidth = fNet->GetInputWidth();
1448 
1449  Int_t batchSize = fNet->GetBatchSize();
1450 
1451  Int_t batchDepth = fNet->GetBatchDepth();
1452  Int_t batchHeight = fNet->GetBatchHeight();
1453  Int_t batchWidth = fNet->GetBatchWidth();
1454 
1455  char lossFunction = static_cast<char>(fNet->GetLossFunction());
1456  char initialization = static_cast<char>(fNet->GetInitialization());
1457  char regularization = static_cast<char>(fNet->GetRegularization());
1458 
1459  Double_t weightDecay = fNet->GetWeightDecay();
1460 
1461  // Method specific info (not sure these are needed)
1462  char outputFunction = static_cast<char>(this->GetOutputFunction());
1463  //char lossFunction = static_cast<char>(this->GetLossFunction());
1464 
1465  // Add attributes to the parent node
1466  xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
1467 
1468  xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
1469  xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
1470  xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
1471 
1472  xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
1473  xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
1474  xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
1475  xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
1476 
1477  xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1478  xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
1479  xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
1480  xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
1481 
1482  gTools().AddAttr(nn, "WeightDecay", weightDecay);
1483 
1484 
1485  for (Int_t i = 0; i < depth; i++)
1486  {
1487  fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
1488  }
1489 
1490 
1491 }
1492 
1493 ////////////////////////////////////////////////////////////////////////////////
1494 void MethodDL::ReadWeightsFromXML(void * rootXML)
1495 {
1496 
1497  auto netXML = gTools().GetChild(rootXML, "Weights");
1498  if (!netXML){
1499  netXML = rootXML;
1500  }
1501 
1502  size_t netDepth;
1503  gTools().ReadAttr(netXML, "NetDepth", netDepth);
1504 
1505  size_t inputDepth, inputHeight, inputWidth;
1506  gTools().ReadAttr(netXML, "InputDepth", inputDepth);
1507  gTools().ReadAttr(netXML, "InputHeight", inputHeight);
1508  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1509 
1510  size_t batchSize, batchDepth, batchHeight, batchWidth;
1511  gTools().ReadAttr(netXML, "BatchSize", batchSize);
1512  // use always batchsize = 1
1513  //batchSize = 1;
1514  gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
1515  gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
1516  gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
1517 
1518  char lossFunctionChar;
1519  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1520  char initializationChar;
1521  gTools().ReadAttr(netXML, "Initialization", initializationChar);
1522  char regularizationChar;
1523  gTools().ReadAttr(netXML, "Regularization", regularizationChar);
1524  char outputFunctionChar;
1525  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1526  double weightDecay;
1527  gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
1528 
1529  // create the net
1530 
1531  // DeepNetCpu_t is defined in MethodDL.h
1532  this->SetInputDepth(inputDepth);
1533  this->SetInputHeight(inputHeight);
1534  this->SetInputWidth(inputWidth);
1535  this->SetBatchDepth(batchDepth);
1536  this->SetBatchHeight(batchHeight);
1537  this->SetBatchWidth(batchWidth);
1538 
1539 
1540 
1541  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1542  batchHeight, batchWidth,
1543  static_cast<ELossFunction>(lossFunctionChar),
1544  static_cast<EInitialization>(initializationChar),
1545  static_cast<ERegularization>(regularizationChar),
1546  weightDecay));
1547 
1548  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1549 
1550 
1551  //size_t previousWidth = inputWidth;
1552  auto layerXML = gTools().xmlengine().GetChild(netXML);
1553 
1554  // loop on the layer and add them to the network
1555  for (size_t i = 0; i < netDepth; i++) {
1556 
1557  TString layerName = gTools().xmlengine().GetNodeName(layerXML);
1558 
1559  // case of dense layer
1560  if (layerName == "DenseLayer") {
1561 
1562  // read width and activation function and then we can create the layer
1563  size_t width = 0;
1564  gTools().ReadAttr(layerXML, "Width", width);
1565 
1566  // Read activation function.
1567  TString funcString;
1568  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
1569  EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
1570 
1571 
1572  fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
1573 
1574  }
1575  // Convolutional Layer
1576  else if (layerName == "ConvLayer") {
1577 
1578  // read width and activation function and then we can create the layer
1579  size_t depth = 0;
1580  gTools().ReadAttr(layerXML, "Depth", depth);
1581  size_t fltHeight, fltWidth = 0;
1582  size_t strideRows, strideCols = 0;
1583  size_t padHeight, padWidth = 0;
1584  gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
1585  gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
1586  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
1587  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
1588  gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
1589  gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
1590 
1591  // Read activation function.
1592  TString funcString;
1593  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
1594  EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
1595 
1596 
1597  fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
1598  padHeight, padWidth, actFunction);
1599 
1600  }
1601 
1602  // MaxPool Layer
1603  else if (layerName == "MaxPoolLayer") {
1604 
1605  // read maxpool layer info
1606  size_t frameHeight, frameWidth = 0;
1607  size_t strideRows, strideCols = 0;
1608  gTools().ReadAttr(layerXML, "FrameHeight", frameHeight);
1609  gTools().ReadAttr(layerXML, "FrameWidth", frameWidth);
1610  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
1611  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
1612 
1613  fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols);
1614  }
1615  else if (layerName == "ReshapeLayer") {
1616 
1617  // read reshape layer info
1618  size_t depth, height, width = 0;
1619  gTools().ReadAttr(layerXML, "Depth", depth);
1620  gTools().ReadAttr(layerXML, "Height", height);
1621  gTools().ReadAttr(layerXML, "Width", width);
1622  int flattening = 0;
1623  gTools().ReadAttr(layerXML, "Flattening",flattening );
1624 
1625  fNet->AddReshapeLayer(depth, height, width, flattening);
1626 
1627  }
1628  else if (layerName == "RNNLayer") {
1629 
1630  // read RNN layer info
1631  size_t stateSize,inputSize, timeSteps = 0;
1632  int rememberState= 0;
1633  gTools().ReadAttr(layerXML, "StateSize", stateSize);
1634  gTools().ReadAttr(layerXML, "InputSize", inputSize);
1635  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
1636  gTools().ReadAttr(layerXML, "RememberState", rememberState );
1637 
1638  fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
1639 
1640  }
1641 
1642 
1643  // read eventually weights and biases
1644  fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
1645 
1646  // read next layer
1647  layerXML = gTools().GetNextChild(layerXML);
1648  }
1649 }
1650 
1651 ////////////////////////////////////////////////////////////////////////////////
1652 void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
1653 {
1654 }
1655 
1656 ////////////////////////////////////////////////////////////////////////////////
1658 {
1659  // TODO
1660  return NULL;
1661 }
1662 
1663 ////////////////////////////////////////////////////////////////////////////////
1665 {
1666  // TODO
1667 }
1668 
1669 } // namespace TMVA
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:238
An array of TObjects.
Definition: TObjArray.h:37
TXMLEngine & xmlengine()
Definition: Tools.h:270
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:217
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:803
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3170
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
auto * m
Definition: textangle.C:8
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:156
void SetInputDepth(size_t inputDepth)
Setters.
Definition: MethodDL.h:233
Double_t Log(Double_t x)
Definition: TMath.h:759
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:239
Collectable string class.
Definition: TObjString.h:28
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:215
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width...
Definition: DeepNet.h:673
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1275
double T(double x)
Definition: ChebyshevPol.h:34
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
MsgLogger & Log() const
Definition: Configurable.h:122
EMsgType GetMinType() const
Definition: MsgLogger.h:71
EAnalysisType
Definition: Types.h:127
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:151
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:150
size_t GetBatchWidth() const
Definition: MethodDL.h:211
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth, filter height and width, striding in rows and columns, the zero paddings, as well as the activation function and the dropout probability.
Definition: DeepNet.h:397
void Init()
default initializations
Definition: MethodDL.cxx:359
#define R__ASSERT(e)
Definition: TError.h:96
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:412
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:1433
void GetHelpMessage() const
Definition: MethodDL.cxx:1664
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:151
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:864
Bool_t IsNaN(Double_t x)
Definition: TMath.h:891
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width...
Definition: DeepNet.h:640
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
Definition: Blas.h:63
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero) ...
Definition: MethodDL.h:148
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:510
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:160
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:506
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options...
Definition: MethodDL.cxx:459
TMatrixT.
Definition: TMatrixDfwd.h:22
EInitialization
Definition: Functions.h:70
size_t fInputDepth
The depth of the input.
Definition: MethodDL.h:140
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
const Event * GetEvent() const
Definition: MethodBase.h:740
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:158
UInt_t GetClass() const
Definition: Event.h:81
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:162
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:79
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:237
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:155
Class that contains all the data information.
Definition: DataSetInfo.h:60
Generic layer class.
Definition: DenseLayer.h:55
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
TString GetArchitectureString() const
Definition: MethodDL.h:225
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:372
void SetInputWidth(size_t inputWidth)
Definition: MethodDL.h:235
All of the options that can be specified in the training string.
Definition: MethodDL.h:62
void ProcessOptions()
Definition: MethodDL.cxx:205
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:883
const TString & GetString() const
Definition: TObjString.h:47
size_t fInputWidth
The width of the input.
Definition: MethodDL.h:142
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:152
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:90
const Ranking * CreateRanking()
Definition: MethodDL.cxx:1657
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:912
The Formula class.
Definition: TFormula.h:83
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:165
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:227
size_t GetBatchDepth() const
Definition: MethodDL.h:209
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:905
size_t GetInputDepth() const
Definition: MethodDL.h:205
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:207
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:144
Tools & gTools()
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width...
Definition: DeepNet.h:460
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:88
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:157
DNN::ERegularization regularization
Definition: MethodDL.h:67
size_t fInputHeight
The height of the input.
Definition: MethodDL.h:141
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
auto * t1
Definition: textangle.C:20
#define ClassImp(name)
Definition: Rtypes.h:359
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:366
double Double_t
Definition: RtypesCore.h:55
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
int type
Definition: TGX11.cxx:120
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:54
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void SetInputHeight(size_t inputHeight)
Definition: MethodDL.h:234
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:1652
void TrainDeepNet()
Implementation of architecture specific train method.
Definition: MethodDL.cxx:965
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:682
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:59
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > & GetValues()
Definition: Event.h:89
IPythonInteractive * fInteractive
Definition: MethodBase.h:437
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:216
auto * l
Definition: textangle.C:4
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:583
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t >> &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:742
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:145
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:71
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:1494
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:154
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
#define I(x, y, z)
size_t GetInputHeight() const
Definition: MethodDL.h:206
const Bool_t kTRUE
Definition: RtypesCore.h:87
size_t GetBatchHeight() const
Definition: MethodDL.h:210
size_t GetInputWidth() const
Definition: MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:146
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1331
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:159
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:91
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:951
Generic Deep Neural Network class.
Definition: DeepNet.h:74