Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#include "TStopwatch.h"
50
51#include <chrono>
52
55
56using namespace TMVA::DNN::CNN;
57using namespace TMVA::DNN;
58
64
65
66namespace TMVA {
67
68
69////////////////////////////////////////////////////////////////////////////////
70TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
71{
72 key.ToUpper();
73 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
74 if (it == keyValueMap.end()) {
75 return TString("");
76 }
77 return it->second;
78}
79
80////////////////////////////////////////////////////////////////////////////////
81template <typename T>
82T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
83
84////////////////////////////////////////////////////////////////////////////////
85template <>
86int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
87{
88 TString value(fetchValueTmp(keyValueMap, key));
89 if (value == "") {
90 return defaultValue;
91 }
92 return value.Atoi();
93}
94
95////////////////////////////////////////////////////////////////////////////////
96template <>
97double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
98{
99 TString value(fetchValueTmp(keyValueMap, key));
100 if (value == "") {
101 return defaultValue;
102 }
103 return value.Atof();
104}
105
106////////////////////////////////////////////////////////////////////////////////
107template <>
108TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
109{
110 TString value(fetchValueTmp(keyValueMap, key));
111 if (value == "") {
112 return defaultValue;
113 }
114 return value;
115}
116
117////////////////////////////////////////////////////////////////////////////////
118template <>
119bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
120{
121 TString value(fetchValueTmp(keyValueMap, key));
122 if (value == "") {
123 return defaultValue;
124 }
125
126 value.ToUpper();
127 if (value == "TRUE" || value == "T" || value == "1") {
128 return true;
129 }
130
131 return false;
132}
133
134////////////////////////////////////////////////////////////////////////////////
135template <>
136std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
137 std::vector<double> defaultValue)
138{
139 TString parseString(fetchValueTmp(keyValueMap, key));
140 if (parseString == "") {
141 return defaultValue;
142 }
143
144 parseString.ToUpper();
145 std::vector<double> values;
146
147 const TString tokenDelim("+");
148 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
149 TIter nextToken(tokenStrings);
150 TObjString *tokenString = (TObjString *)nextToken();
151 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
152 std::stringstream sstr;
153 double currentValue;
154 sstr << tokenString->GetString().Data();
155 sstr >> currentValue;
156 values.push_back(currentValue);
157 }
158 return values;
159}
160
161////////////////////////////////////////////////////////////////////////////////
163{
164 // Set default values for all option strings
165
166 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
167
168 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
169
170 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
171
172 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
173 " or cross entropy (binary classification).");
174 AddPreDefVal(TString("CROSSENTROPY"));
175 AddPreDefVal(TString("SUMOFSQUARES"));
176 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
177
178 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
179 AddPreDefVal(TString("XAVIER"));
180 AddPreDefVal(TString("XAVIERUNIFORM"));
181 AddPreDefVal(TString("GAUSS"));
182 AddPreDefVal(TString("UNIFORM"));
183 AddPreDefVal(TString("IDENTITY"));
184 AddPreDefVal(TString("ZERO"));
185
186 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
187
188 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
189 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
190 "Specify as 100 to use exactly 100 events. (Default: 20%)");
191
192 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
193 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
194 AddPreDefVal(TString("CPU"));
195 AddPreDefVal(TString("GPU"));
196 AddPreDefVal(TString("OPENCL")); // not yet implemented
197 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
198
199 // define training stratgey separated by a separator "|"
200 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
201 "Momentum=0.3,"
202 "Repetitions=3,"
203 "ConvergenceSteps=50,"
204 "BatchSize=30,"
205 "TestRepetitions=7,"
206 "WeightDecay=0.0,"
207 "Regularization=None,"
208 "DropConfig=0.0,"
209 "DropRepetitions=5"
210 "|"
211 "LearningRate=1e-4,"
212 "Momentum=0.3,"
213 "Repetitions=3,"
214 "ConvergenceSteps=50,"
215 "MaxEpochs=2000,"
216 "BatchSize=20,"
217 "TestRepetitions=7,"
218 "WeightDecay=0.001,"
219 "Regularization=L2,"
220 "DropConfig=0.0+0.5+0.5,"
221 "DropRepetitions=5,"
222 "Multithreading=True",
223 "TrainingStrategy", "Defines the training strategies.");
224}
225
226////////////////////////////////////////////////////////////////////////////////
228{
229
231 Log() << kINFO << "Will ignore negative events in training!" << Endl;
232 }
233
234 if (fArchitectureString == "STANDARD") {
235 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
236 "Please use Architecture=CPU or Architecture=CPU."
237 "See the TMVA Users' Guide for instructions if you "
238 "encounter problems."
239 << Endl;
240 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
241 fArchitectureString = "CPU";
242 }
243 if (fArchitectureString == "OPENCL") {
244 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
245 "Please use Architecture=CPU or Architecture=CPU for the "
246 "time being. See the TMVA Users' Guide for instructions "
247 "if you encounter problems."
248 << Endl;
249 // use instead GPU
250 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
251 fArchitectureString = "GPU";
252 }
253
254 // the architecture can now be set at runtime as an option
255
256
257 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
258#ifdef R__HAS_TMVAGPU
259 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
260#else // case TMVA does not support GPU
261 Log() << kERROR << "CUDA backend not enabled. Please make sure "
262 "you have CUDA installed and it was successfully "
263 "detected by CMAKE by using -Dtmva-gpu=On "
264 << Endl;
265 fArchitectureString = "CPU";
266 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
267#endif
268 }
269
270 if (fArchitectureString == "CPU") {
271#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
272 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
273#else // TMVA has no CPU BLAS or IMT support
274 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
275 "you have a BLAS implementation and it was successfully "
276 "detected by CMake as well that the imt CMake flag is set."
277 << Endl;
278 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
279#endif
280 }
281
282 // Input Layout
285
286 // Loss function and output.
289 if (fErrorStrategy == "SUMOFSQUARES") {
290 fLossFunction = ELossFunction::kMeanSquaredError;
291 }
292 if (fErrorStrategy == "CROSSENTROPY") {
294 }
296 } else if (fAnalysisType == Types::kRegression) {
297 if (fErrorStrategy != "SUMOFSQUARES") {
298 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
299 << " neural net error function. Setting error function to "
300 << " SUMOFSQUARES now." << Endl;
301 }
302
303 fLossFunction = ELossFunction::kMeanSquaredError;
305 } else if (fAnalysisType == Types::kMulticlass) {
306 if (fErrorStrategy == "SUMOFSQUARES") {
307 fLossFunction = ELossFunction::kMeanSquaredError;
308 }
309 if (fErrorStrategy == "CROSSENTROPY") {
311 }
312 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
313 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
314 }
316 }
317
318 // Initialization
319 // the biases will be always initialized to zero
320 if (fWeightInitializationString == "XAVIER") {
322 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
324 } else if (fWeightInitializationString == "GAUSS") {
326 } else if (fWeightInitializationString == "UNIFORM") {
328 } else if (fWeightInitializationString == "ZERO") {
330 } else if (fWeightInitializationString == "IDENTITY") {
332 } else {
334 }
335
336 // Training settings.
337
339 for (auto &block : strategyKeyValues) {
340 TTrainingSettings settings;
341
342 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
343 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
344 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
345 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
346 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
347 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
348 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
349 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
350
351 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
352 if (regularization == "L1") {
354 } else if (regularization == "L2") {
356 } else {
358 }
359
360 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
361 settings.optimizerName = optimizer;
362 if (optimizer == "SGD") {
364 } else if (optimizer == "ADAM") {
366 } else if (optimizer == "ADAGRAD") {
368 } else if (optimizer == "RMSPROP") {
370 } else if (optimizer == "ADADELTA") {
372 } else {
373 // Make Adam as default choice if the input string is
374 // incorrect.
376 settings.optimizerName = "ADAM";
377 }
378
379
380 TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
381
382 if (strMultithreading.BeginsWith("T")) {
383 settings.multithreading = true;
384 } else {
385 settings.multithreading = false;
386 }
387
388 fTrainingSettings.push_back(settings);
389 }
390
391 this->SetBatchSize(fTrainingSettings.front().batchSize);
392
393 // case inputlayout and batch layout was not given. Use default then
394 // (1, batchsize, nvariables)
395 // fInputShape[0] -> BatchSize
396 // fInputShape[1] -> InputDepth
397 // fInputShape[2] -> InputHeight
398 // fInputShape[3] -> InputWidth
399 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
400 fInputShape[1] = 1;
401 fInputShape[2] = 1;
403 }
404 // case when batch layout is not provided (all zero)
405 // batch layout can be determined by the input layout + batch size
406 // case DNN : { 1, B, W }
407 // case CNN : { B, C, H*W}
408 // case RNN : { B, T, H*W }
409
410 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
411 // case first layer is DENSE
412 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
413 // case of (1, batchsize, input features)
414 fBatchDepth = 1;
415 fBatchHeight = fTrainingSettings.front().batchSize;
417 }
418 else { // more general cases (e.g. for CNN)
419 // case CONV or RNN
420 fBatchDepth = fTrainingSettings.front().batchSize;
423 }
424 }
425}
426
427////////////////////////////////////////////////////////////////////////////////
428/// default initializations
430{
431 // Nothing to do here
432}
433
434////////////////////////////////////////////////////////////////////////////////
435/// Parse the input layout
437{
438 // Define the delimiter
439 const TString delim("|");
440
441 // Get the input layout string
442 TString inputLayoutString = this->GetInputLayoutString();
443
444 // Split the input layout string
445 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
446 TIter nextInputDim(inputDimStrings);
447 TObjString *inputDimString = (TObjString *)nextInputDim();
448
449 // Go through every token and save its absolute value in the shape array
450 // The first token is the batch size for easy compatibility with cudnn
451 int subDim = 1;
452 std::vector<size_t> inputShape;
453 inputShape.reserve(inputLayoutString.Length()/2 + 2);
454 inputShape.push_back(30); // Will be set later by Trainingsettings, use 0 value now
455 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
456 // size_t is unsigned
457 subDim = (size_t) abs(inputDimString->GetString().Atoi());
458 // Size among unused dimensions should be set to 1 for cudnn
459 //if (subDim == 0) subDim = 1;
460 inputShape.push_back(subDim);
461 }
462 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
463 // for example in case of dense layer input layouts
464 // when we will support 3D convolutions we would need to add extra 1's
465 if (inputShape.size() == 2) {
466 // case of dense layer where only width is specified
467 inputShape.insert(inputShape.begin() + 1, {1,1});
468 }
469 else if (inputShape.size() == 3) {
470 //e.g. case of RNN T,W -> T,1,W
471 inputShape.insert(inputShape.begin() + 2, 1);
472 }
473
474 this->SetInputShape(inputShape);
475}
476
477////////////////////////////////////////////////////////////////////////////////
478/// Parse the input layout
480{
481 // Define the delimiter
482 const TString delim("|");
483
484 // Get the input layout string
485 TString batchLayoutString = this->GetBatchLayoutString();
486
487 size_t batchDepth = 0;
488 size_t batchHeight = 0;
489 size_t batchWidth = 0;
490
491 // Split the input layout string
492 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
493 TIter nextBatchDim(batchDimStrings);
494 TObjString *batchDimString = (TObjString *)nextBatchDim();
495 int idxToken = 0;
496
497 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
498 switch (idxToken) {
499 case 0: // input depth
500 {
501 TString strDepth(batchDimString->GetString());
502 batchDepth = (size_t)strDepth.Atoi();
503 } break;
504 case 1: // input height
505 {
506 TString strHeight(batchDimString->GetString());
507 batchHeight = (size_t)strHeight.Atoi();
508 } break;
509 case 2: // input width
510 {
511 TString strWidth(batchDimString->GetString());
512 batchWidth = (size_t)strWidth.Atoi();
513 } break;
514 }
515 ++idxToken;
516 }
517
518 this->SetBatchDepth(batchDepth);
519 this->SetBatchHeight(batchHeight);
520 this->SetBatchWidth(batchWidth);
521}
522
523////////////////////////////////////////////////////////////////////////////////
524/// Create a deep net based on the layout string
525template <typename Architecture_t, typename Layer_t>
528{
529 // Layer specification, layer details
530 const TString layerDelimiter(",");
531 const TString subDelimiter("|");
532
533 TString layoutString = this->GetLayoutString();
534
535 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
536
537 // Split layers
538 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
539 TIter nextLayer(layerStrings);
540 TObjString *layerString = (TObjString *)nextLayer();
541
542
543 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
544
545 // Split layer details
546 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
547 TIter nextToken(subStrings);
548 TObjString *token = (TObjString *)nextToken();
549
550 // Determine the type of the layer
551 TString strLayerType = token->GetString();
552
553
554 if (strLayerType == "DENSE") {
555 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
556 } else if (strLayerType == "CONV") {
557 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
558 } else if (strLayerType == "MAXPOOL") {
559 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
560 } else if (strLayerType == "RESHAPE") {
561 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
562 } else if (strLayerType == "BNORM") {
563 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
564 } else if (strLayerType == "RNN") {
565 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
566 } else if (strLayerType == "LSTM") {
567 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
568 } else if (strLayerType == "GRU") {
569 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
570 } else {
571 // no type of layer specified - assume is dense layer as in old DNN interface
572 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
573 }
574 }
575}
576
577////////////////////////////////////////////////////////////////////////////////
578/// Pases the layer string and creates the appropriate dense layer
579template <typename Architecture_t, typename Layer_t>
581 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
582 TString delim)
583{
584 int width = 0;
586
587 // this return number of input variables for the method
588 // it can be used to deduce width of dense layer if specified as N+10
589 // where N is the number of input variables
590 const size_t inputSize = GetNvar();
591
592 // Split layer details
593 TObjArray *subStrings = layerString.Tokenize(delim);
594 TIter nextToken(subStrings);
595 TObjString *token = (TObjString *)nextToken();
596 int idxToken = 0;
597
598 // loop on the tokens
599 // order of sepcifying width and activation function is not relevant
600 // both 100|TANH and TANH|100 are valid cases
601 for (; token != nullptr; token = (TObjString *)nextToken()) {
602 idxToken++;
603 // try a match with the activation function
604 TString strActFnc(token->GetString());
605 // if first token defines the layer type- skip it
606 if (strActFnc =="DENSE") continue;
607
608 if (strActFnc == "RELU") {
609 activationFunction = DNN::EActivationFunction::kRelu;
610 } else if (strActFnc == "TANH") {
611 activationFunction = DNN::EActivationFunction::kTanh;
612 } else if (strActFnc == "FTANH") {
613 activationFunction = DNN::EActivationFunction::kFastTanh;
614 } else if (strActFnc == "SYMMRELU") {
615 activationFunction = DNN::EActivationFunction::kSymmRelu;
616 } else if (strActFnc == "SOFTSIGN") {
617 activationFunction = DNN::EActivationFunction::kSoftSign;
618 } else if (strActFnc == "SIGMOID") {
619 activationFunction = DNN::EActivationFunction::kSigmoid;
620 } else if (strActFnc == "LINEAR") {
621 activationFunction = DNN::EActivationFunction::kIdentity;
622 } else if (strActFnc == "GAUSS") {
623 activationFunction = DNN::EActivationFunction::kGauss;
624 } else if (width == 0) {
625 // no match found try to parse as text showing the width
626 // support for input a formula where the variable 'x' is 'N' in the string
627 // use TFormula for the evaluation
628 TString strNumNodes = strActFnc;
629 // number of nodes
630 TString strN("x");
631 strNumNodes.ReplaceAll("N", strN);
632 strNumNodes.ReplaceAll("n", strN);
633 TFormula fml("tmp", strNumNodes);
634 width = fml.Eval(inputSize);
635 }
636 }
637 // avoid zero width. assume is last layer and give width = output width
638 // Determine the number of outputs
639 size_t outputSize = 1;
641 outputSize = GetNTargets();
642 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
643 outputSize = DataInfo().GetNClasses();
644 }
645 if (width == 0) width = outputSize;
646
647 // Add the dense layer, initialize the weights and biases and copy
648 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649 denseLayer->Initialize();
650
651 // add same layer to fNet
652 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
653
654 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
655
656 // add the copy to all slave nets
657 //for (size_t i = 0; i < nets.size(); i++) {
658 // nets[i].AddDenseLayer(copyDenseLayer);
659 //}
660
661 // check compatibility of added layer
662 // for a dense layer input should be 1 x 1 x DxHxW
663}
664
665////////////////////////////////////////////////////////////////////////////////
666/// Pases the layer string and creates the appropriate convolutional layer
667template <typename Architecture_t, typename Layer_t>
669 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
670 TString delim)
671{
672 int depth = 0;
673 int fltHeight = 0;
674 int fltWidth = 0;
675 int strideRows = 0;
676 int strideCols = 0;
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
680
681 // Split layer details
682 TObjArray *subStrings = layerString.Tokenize(delim);
683 TIter nextToken(subStrings);
684 TObjString *token = (TObjString *)nextToken();
685 int idxToken = 0;
686
687 for (; token != nullptr; token = (TObjString *)nextToken()) {
688 switch (idxToken) {
689 case 1: // depth
690 {
691 TString strDepth(token->GetString());
692 depth = strDepth.Atoi();
693 } break;
694 case 2: // filter height
695 {
696 TString strFltHeight(token->GetString());
697 fltHeight = strFltHeight.Atoi();
698 } break;
699 case 3: // filter width
700 {
701 TString strFltWidth(token->GetString());
702 fltWidth = strFltWidth.Atoi();
703 } break;
704 case 4: // stride in rows
705 {
706 TString strStrideRows(token->GetString());
707 strideRows = strStrideRows.Atoi();
708 } break;
709 case 5: // stride in cols
710 {
711 TString strStrideCols(token->GetString());
712 strideCols = strStrideCols.Atoi();
713 } break;
714 case 6: // zero padding height
715 {
716 TString strZeroPadHeight(token->GetString());
717 zeroPadHeight = strZeroPadHeight.Atoi();
718 } break;
719 case 7: // zero padding width
720 {
721 TString strZeroPadWidth(token->GetString());
722 zeroPadWidth = strZeroPadWidth.Atoi();
723 } break;
724 case 8: // activation function
725 {
726 TString strActFnc(token->GetString());
727 if (strActFnc == "RELU") {
728 activationFunction = DNN::EActivationFunction::kRelu;
729 } else if (strActFnc == "TANH") {
730 activationFunction = DNN::EActivationFunction::kTanh;
731 } else if (strActFnc == "SYMMRELU") {
732 activationFunction = DNN::EActivationFunction::kSymmRelu;
733 } else if (strActFnc == "SOFTSIGN") {
734 activationFunction = DNN::EActivationFunction::kSoftSign;
735 } else if (strActFnc == "SIGMOID") {
736 activationFunction = DNN::EActivationFunction::kSigmoid;
737 } else if (strActFnc == "LINEAR") {
738 activationFunction = DNN::EActivationFunction::kIdentity;
739 } else if (strActFnc == "GAUSS") {
740 activationFunction = DNN::EActivationFunction::kGauss;
741 }
742 } break;
743 }
744 ++idxToken;
745 }
746
747 // Add the convolutional layer, initialize the weights and biases and copy
748 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749 zeroPadHeight, zeroPadWidth, activationFunction);
750 convLayer->Initialize();
751
752 // Add same layer to fNet
753 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
755
756 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
757
758 //// add the copy to all slave nets
759 //for (size_t i = 0; i < nets.size(); i++) {
760 // nets[i].AddConvLayer(copyConvLayer);
761 //}
762}
763
764////////////////////////////////////////////////////////////////////////////////
765/// Pases the layer string and creates the appropriate max pool layer
766template <typename Architecture_t, typename Layer_t>
768 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
769 TString delim)
770{
771
772 int filterHeight = 0;
773 int filterWidth = 0;
774 int strideRows = 0;
775 int strideCols = 0;
776
777 // Split layer details
778 TObjArray *subStrings = layerString.Tokenize(delim);
779 TIter nextToken(subStrings);
780 TObjString *token = (TObjString *)nextToken();
781 int idxToken = 0;
782
783 for (; token != nullptr; token = (TObjString *)nextToken()) {
784 switch (idxToken) {
785 case 1: // filter height
786 {
787 TString strFrmHeight(token->GetString());
788 filterHeight = strFrmHeight.Atoi();
789 } break;
790 case 2: // filter width
791 {
792 TString strFrmWidth(token->GetString());
793 filterWidth = strFrmWidth.Atoi();
794 } break;
795 case 3: // stride in rows
796 {
797 TString strStrideRows(token->GetString());
798 strideRows = strStrideRows.Atoi();
799 } break;
800 case 4: // stride in cols
801 {
802 TString strStrideCols(token->GetString());
803 strideCols = strStrideCols.Atoi();
804 } break;
805 }
806 ++idxToken;
807 }
808
809 // Add the Max pooling layer
810 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
811 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
812
813 // Add the same layer to fNet
814 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816
817 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
818
819 //// add the copy to all slave nets
820 //for (size_t i = 0; i < nets.size(); i++) {
821 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
822 //}
823}
824
825////////////////////////////////////////////////////////////////////////////////
826/// Pases the layer string and creates the appropriate reshape layer
827template <typename Architecture_t, typename Layer_t>
829 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
830 TString delim)
831{
832 int depth = 0;
833 int height = 0;
834 int width = 0;
835 bool flattening = false;
836
837 // Split layer details
838 TObjArray *subStrings = layerString.Tokenize(delim);
839 TIter nextToken(subStrings);
840 TObjString *token = (TObjString *)nextToken();
841 int idxToken = 0;
842
843 for (; token != nullptr; token = (TObjString *)nextToken()) {
844 if (token->GetString() == "FLAT") idxToken=4;
845 switch (idxToken) {
846 case 1: {
847 TString strDepth(token->GetString());
848 depth = strDepth.Atoi();
849 } break;
850 case 2: // height
851 {
852 TString strHeight(token->GetString());
853 height = strHeight.Atoi();
854 } break;
855 case 3: // width
856 {
857 TString strWidth(token->GetString());
858 width = strWidth.Atoi();
859 } break;
860 case 4: // flattening
861 {
862 TString flat(token->GetString());
863 if (flat == "FLAT") {
864 flattening = true;
865 }
866 } break;
867 }
868 ++idxToken;
869 }
870
871 // Add the reshape layer
872 // TReshapeLayer<Architecture_t> *reshapeLayer =
873 deepNet.AddReshapeLayer(depth, height, width, flattening);
874
875 // Add the same layer to fNet
876 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
877
878 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
879
880 //// add the copy to all slave nets
881 //for (size_t i = 0; i < nets.size(); i++) {
882 // nets[i].AddReshapeLayer(copyReshapeLayer);
883 //}
884}
885
886////////////////////////////////////////////////////////////////////////////////
887/// Pases the layer string and creates the appropriate reshape layer
888template <typename Architecture_t, typename Layer_t>
890 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
891 TString delim)
892{
893
894 // default values
895 double momentum = -1; //0.99;
896 double epsilon = 0.0001;
897
898 // Split layer details
899 TObjArray *subStrings = layerString.Tokenize(delim);
900 TIter nextToken(subStrings);
901 TObjString *token = (TObjString *)nextToken();
902 int idxToken = 0;
903
904 for (; token != nullptr; token = (TObjString *)nextToken()) {
905 switch (idxToken) {
906 case 1: {
907 momentum = std::atof(token->GetString().Data());
908 } break;
909 case 2: // height
910 {
911 epsilon = std::atof(token->GetString().Data());
912 } break;
913 }
914 ++idxToken;
915 }
916
917 // Add the batch norm layer
918 //
919 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
920 layer->Initialize();
921
922 // Add the same layer to fNet
923 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
924
925}
926
927////////////////////////////////////////////////////////////////////////////////
928/// Pases the layer string and creates the appropriate rnn layer
929template <typename Architecture_t, typename Layer_t>
931 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
932 TString delim)
933{
934 // int depth = 0;
935 int stateSize = 0;
936 int inputSize = 0;
937 int timeSteps = 0;
938 bool rememberState = false;
939 bool returnSequence = false;
940 bool resetGateAfter = false;
941
942 // Split layer details
943 TObjArray *subStrings = layerString.Tokenize(delim);
944 TIter nextToken(subStrings);
945 TObjString *token = (TObjString *)nextToken();
946 int idxToken = 0;
947
948 for (; token != nullptr; token = (TObjString *)nextToken()) {
949 switch (idxToken) {
950 case 1: // state size
951 {
952 TString strstateSize(token->GetString());
953 stateSize = strstateSize.Atoi();
954 break;
955 }
956 case 2: // input size
957 {
958 TString strinputSize(token->GetString());
959 inputSize = strinputSize.Atoi();
960 break;
961 }
962 case 3: // time steps
963 {
964 TString strtimeSteps(token->GetString());
965 timeSteps = strtimeSteps.Atoi();
966 break;
967 }
968 case 4: // returnSequence (option stateful in Keras)
969 {
970 TString strrememberState(token->GetString());
971 rememberState = (bool) strrememberState.Atoi();
972 break;
973 }
974 case 5: // return full output sequence (1 or 0)
975 {
976 TString str(token->GetString());
977 returnSequence = (bool)str.Atoi();
978 break;
979 }
980 case 6: // resetGate after option (only for GRU)
981 {
982 TString str(token->GetString());
983 resetGateAfter = (bool)str.Atoi();
984 }
985 }
986 ++idxToken;
987 }
988
989 // Add the recurrent layer, initialize the weights and biases and copy
990 if (rnnType == kLayerRNN) {
991 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
992 recurrentLayer->Initialize();
993 // Add same layer to fNet
994 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 }
996 else if (rnnType == kLayerLSTM ) {
997 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 recurrentLayer->Initialize();
999 // Add same layer to fNet
1000 if (fBuildNet)
1001 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1002 }
1003 else if (rnnType == kLayerGRU) {
1004 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1005 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1006 recurrentLayer->Initialize();
1007 // Add same layer to fNet
1008 if (fBuildNet)
1009 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1010 }
1011 else {
1012 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1013 }
1014}
1015
1016////////////////////////////////////////////////////////////////////////////////
1017/// Standard constructor.
1018MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1019 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1020 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1021 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1022 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1023 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1024 fXInput()
1025{
1026 // Nothing to do here
1027}
1028
1029////////////////////////////////////////////////////////////////////////////////
1030/// Constructor from a weight file.
1031MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1032 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1033 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1034 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1035 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1036 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1037 fXInput()
1038{
1039 // Nothing to do here
1040}
1041
1042////////////////////////////////////////////////////////////////////////////////
1043/// Destructor.
1045{
1046 // Nothing to do here
1047}
1048
1049////////////////////////////////////////////////////////////////////////////////
1050/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1051auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1052{
1053 // remove empty spaces
1054 parseString.ReplaceAll(" ","");
1055 KeyValueVector_t blockKeyValues;
1056 const TString keyValueDelim("=");
1057
1058 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1059 TIter nextBlock(blockStrings);
1060 TObjString *blockString = (TObjString *)nextBlock();
1061
1062 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1063 blockKeyValues.push_back(std::map<TString, TString>());
1064 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1065
1066 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1067 TIter nextToken(subStrings);
1068 TObjString *token = (TObjString *)nextToken();
1069
1070 for (; token != nullptr; token = (TObjString *)nextToken()) {
1071 TString strKeyValue(token->GetString());
1072 int delimPos = strKeyValue.First(keyValueDelim.Data());
1073 if (delimPos <= 0) continue;
1074
1075 TString strKey = TString(strKeyValue(0, delimPos));
1076 strKey.ToUpper();
1077 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1078
1079 strKey.Strip(TString::kBoth, ' ');
1080 strValue.Strip(TString::kBoth, ' ');
1081
1082 currentBlock.insert(std::make_pair(strKey, strValue));
1083 }
1084 }
1085 return blockKeyValues;
1086}
1087
1088////////////////////////////////////////////////////////////////////////////////
1089/// What kind of analysis type can handle the CNN
1091{
1092 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1093 if (type == Types::kMulticlass) return kTRUE;
1094 if (type == Types::kRegression) return kTRUE;
1095
1096 return kFALSE;
1097}
1098
1099////////////////////////////////////////////////////////////////////////////////
1100/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1101/// 100 etc.
1102/// - 20% and 0.2 selects 20% of the training set as validation data.
1103/// - 100 selects 100 events as the validation data.
1104///
1105/// @return number of samples in validation set
1106///
1108{
1109 Int_t nValidationSamples = 0;
1110 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1111
1112 // Parsing + Validation
1113 // --------------------
1114 if (fNumValidationString.EndsWith("%")) {
1115 // Relative spec. format 20%
1116 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1117
1118 if (intValStr.IsFloat()) {
1119 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1120 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1121 } else {
1122 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1123 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1124 }
1125 } else if (fNumValidationString.IsFloat()) {
1126 Double_t valSizeAsDouble = fNumValidationString.Atof();
1127
1128 if (valSizeAsDouble < 1.0) {
1129 // Relative spec. format 0.2
1130 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1131 } else {
1132 // Absolute spec format 100 or 100.0
1133 nValidationSamples = valSizeAsDouble;
1134 }
1135 } else {
1136 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1137 << Endl;
1138 }
1139
1140 // Value validation
1141 // ----------------
1142 if (nValidationSamples < 0) {
1143 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1144 }
1145
1146 if (nValidationSamples == 0) {
1147 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1148 }
1149
1150 if (nValidationSamples >= (Int_t)trainingSetSize) {
1151 Log() << kFATAL << "Validation size \"" << fNumValidationString
1152 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1153 }
1154
1155 return nValidationSamples;
1156}
1157
1158
1159////////////////////////////////////////////////////////////////////////////////
1160/// Implementation of architecture specific train method
1161///
1162template <typename Architecture_t>
1164{
1165
1166 using Scalar_t = typename Architecture_t::Scalar_t;
1169 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1170
1171 bool debug = Log().GetMinType() == kDEBUG;
1172
1173
1174 // set the random seed for weight initialization
1175 Architecture_t::SetRandomSeed(fRandomSeed);
1176
1177 ///split training data in training and validation data
1178 // and determine the number of training and testing examples
1179
1180 size_t nValidationSamples = GetNumValidationSamples();
1181 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1182
1183 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1186
1187 size_t trainingPhase = 1;
1188
1189 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1190
1191 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1192
1193
1194 // After the processing of the options, initialize the master deep net
1195 size_t batchSize = settings.batchSize;
1196 this->SetBatchSize(batchSize);
1197 // Should be replaced by actual implementation. No support for this now.
1198 size_t inputDepth = this->GetInputDepth();
1199 size_t inputHeight = this->GetInputHeight();
1200 size_t inputWidth = this->GetInputWidth();
1201 size_t batchDepth = this->GetBatchDepth();
1202 size_t batchHeight = this->GetBatchHeight();
1203 size_t batchWidth = this->GetBatchWidth();
1204 ELossFunction J = this->GetLossFunction();
1206 ERegularization R = settings.regularization;
1207 EOptimizer O = settings.optimizer;
1208 Scalar_t weightDecay = settings.weightDecay;
1209
1210 //Batch size should be included in batch layout as well. There are two possibilities:
1211 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1212 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1213 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1214 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1215
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1218 return;
1219 }
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1222 return;
1223 }
1224
1225
1226 //check also that input layout compatible with batch layout
1227 bool badLayout = false;
1228 // case batch depth == batch size
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1231 // case batch Height is batch size
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1234 if (badLayout) {
1235 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1237 return;
1238 }
1239
1240 // check batch size is compatible with number of events
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL << "Number of samples in the datasets are train: ("
1243 << nTrainingSamples << ") test: (" << nValidationSamples
1244 << "). One of these is smaller than the batch size of "
1245 << settings.batchSize << ". Please increase the batch"
1246 << " size to be at least the same size as the smallest"
1247 << " of them." << Endl;
1248 }
1249
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1251
1252 // create a copy of DeepNet for evaluating but with batch size = 1
1253 // fNet is the saved network and will be with CPU or Referrence architecture
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1256 batchHeight, batchWidth, J, I, R, weightDecay));
1257 fBuildNet = true;
1258 }
1259 else
1260 fBuildNet = false;
1261
1262 // Initialize the vector of slave nets
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (size_t i = 0; i < nThreads; i++) {
1266 // create a copies of the master deep net
1267 nets.push_back(deepNet);
1268 }
1269
1270
1271 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1272 CreateDeepNet(deepNet, nets);
1273
1274
1275 // set droput probabilities
1276 // use convention to store in the layer 1.- dropout probabilities
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (auto & p : dropoutVector) {
1279 p = 1.0 - p;
1280 }
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1282
1283 if (trainingPhase > 1) {
1284 // copy initial weights from fNet to deepnet
1285 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1287 }
1288 }
1289
1290 // when fNet is built create also input matrix that will be used to evaluate it
1291 if (fBuildNet) {
1292 //int n1 = batchHeight;
1293 //int n2 = batchWidth;
1294 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1295 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1296 //fXInput = TensorImpl_t(1,n1,n2);
1298 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1301
1302
1303 // create pointer to output matrix used for the predictions
1304 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1305
1306 // print the created network
1307 Log() << "***** Deep Learning Network *****" << Endl;
1308 if (Log().GetMinType() <= kINFO)
1309 deepNet.Print();
1310 }
1311 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1312
1313 // Loading the training and validation datasets
1314 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1315 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1319
1320 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1321 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1325
1326
1327
1328 // do an evaluation of the network to compute initial minimum test error
1329
1330 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1331
1332 Double_t minValError = 0.0;
1333 Log() << "Compute initial loss on the validation data " << Endl;
1334 for (auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1338
1339 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1340 // should we apply droput to the loss ??
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1342 }
1343 // add Regularization term
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1346 minValError += regzTerm;
1347
1348
1349 // create a pointer to base class VOptimizer
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1351
1352 // initialize the base class pointer with the corresponding derived class object.
1353 switch (O) {
1354
1355 case EOptimizer::kSGD:
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1357 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1358 break;
1359
1360 case EOptimizer::kAdam:
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1362 new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1363 break;
1364
1365 case EOptimizer::kAdagrad:
1366 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1367 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1368 break;
1369
1370 case EOptimizer::kRMSProp:
1371 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1372 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1373 break;
1374
1375 case EOptimizer::kAdadelta:
1376 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1377 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1378 break;
1379 }
1380
1381
1382 // Initialize the vector of batches, one batch for one slave network
1383 std::vector<TTensorBatch<Architecture_t>> batches{};
1384
1385 bool converged = false;
1386 size_t convergenceCount = 0;
1387 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1388
1389 // start measuring
1390 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1391 tstart = std::chrono::system_clock::now();
1392
1393 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1394 << " Optimizer " << settings.optimizerName
1395 << " Learning rate = " << settings.learningRate
1396 << " regularization " << (char) settings.regularization
1397 << " minimum error = " << minValError
1398 << Endl;
1399 if (!fInteractive) {
1400 std::string separator(62, '-');
1401 Log() << separator << Endl;
1402 Log() << std::setw(10) << "Epoch"
1403 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1404 << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1405 << std::setw(12) << "nEvents/s"
1406 << std::setw(12) << "Conv. Steps" << Endl;
1407 Log() << separator << Endl;
1408 }
1409
1410 // set up generator for shuffling the batches
1411 // if seed is zero we have always a different order in the batches
1412 size_t shuffleSeed = 0;
1413 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1414 RandomGenerator<TRandom3> rng(shuffleSeed);
1415
1416 // print weights before
1417 if (fBuildNet && debug) {
1418 Log() << "Initial Deep Net Weights " << Endl;
1419 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1420 for (size_t l = 0; l < weights_tensor.size(); ++l)
1421 weights_tensor[l].Print();
1422 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1423 bias_tensor[0].Print();
1424 }
1425
1426 Log() << " Start epoch iteration ..." << Endl;
1427 bool debugFirstEpoch = false;
1428 bool computeLossInTraining = true; // compute loss in training or at test time
1429 size_t nTrainEpochs = 0;
1430 while (!converged) {
1431 nTrainEpochs++;
1432 trainingData.Shuffle(rng);
1433
1434 // execute all epochs
1435 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1436
1437 Double_t trainingError = 0;
1438 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1439 // Clean and load new batches, one batch for one slave net
1440 //batches.clear();
1441 //batches.reserve(nThreads);
1442 //for (size_t j = 0; j < nThreads; j++) {
1443 // batches.push_back(trainingData.GetTensorBatch());
1444 //}
1445 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1446
1447 auto my_batch = trainingData.GetTensorBatch();
1448
1449 if (debugFirstEpoch)
1450 std::cout << "got batch data - doing forward \n";
1451
1452#ifdef DEBUG
1453
1454 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1455 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1456 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1457 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1458 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1459#endif
1460
1461 deepNet.Forward(my_batch.GetInput(), true);
1462 // compute also loss
1463 if (computeLossInTraining) {
1464 auto outputMatrix = my_batch.GetOutput();
1465 auto weights = my_batch.GetWeights();
1466 trainingError += deepNet.Loss(outputMatrix, weights, false);
1467 }
1468
1469 if (debugFirstEpoch)
1470 std::cout << "- doing backward \n";
1471
1472#ifdef DEBUG
1473 size_t nlayers = deepNet.GetLayers().size();
1474 for (size_t l = 0; l < nlayers; ++l) {
1475 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1476 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1477 TString::Format("initial weights layer %d", l).Data());
1478
1479 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1480 TString::Format("output tensor layer %d", l).Data());
1481 }
1482#endif
1483
1484 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1485
1486 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1487
1488 if (debugFirstEpoch)
1489 std::cout << "- doing optimizer update \n";
1490
1491 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1492 optimizer->IncrementGlobalStep();
1493 optimizer->Step();
1494
1495#ifdef DEBUG
1496 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1497 for (size_t l = 0; l < nlayers; ++l) {
1498 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1499 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1500 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1501 }
1502 }
1503#endif
1504
1505 }
1506
1507 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1508 //}
1509 debugFirstEpoch = false;
1510 if ((nTrainEpochs % settings.testInterval) == 0) {
1511
1512 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1513
1514 t1 = std::chrono::system_clock::now();
1515
1516 // Compute validation error.
1517
1518
1519 Double_t valError = 0.0;
1520 bool inTraining = false;
1521 for (auto batch : validationData) {
1522 auto inputTensor = batch.GetInput();
1523 auto outputMatrix = batch.GetOutput();
1524 auto weights = batch.GetWeights();
1525 // should we apply droput to the loss ??
1526 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1527 }
1528 // normalize loss to number of batches and add regularization term
1529 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1530 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1531 valError += regTerm;
1532
1533 //Log the loss value
1534 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1535
1536 t2 = std::chrono::system_clock::now();
1537
1538 // checking for convergence
1539 if (valError < minValError) {
1540 convergenceCount = 0;
1541 } else {
1542 convergenceCount += settings.testInterval;
1543 }
1544
1545 // copy configuration when reached a minimum error
1546 if (valError < minValError ) {
1547 // Copy weights from deepNet to fNet
1548 Log() << std::setw(10) << nTrainEpochs
1549 << " Minimum Test error found - save the configuration " << Endl;
1550 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1551 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1552 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1553 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1554 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1555 // }
1556 }
1557 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1558 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1559
1560 minValError = valError;
1561 }
1562 else if ( minValError <= 0. )
1563 minValError = valError;
1564
1565 if (!computeLossInTraining) {
1566 trainingError = 0.0;
1567 // Compute training error.
1568 for (auto batch : trainingData) {
1569 auto inputTensor = batch.GetInput();
1570 auto outputMatrix = batch.GetOutput();
1571 auto weights = batch.GetWeights();
1572 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1573 }
1574 }
1575 // normalize loss to number of batches and add regularization term
1576 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1577 trainingError += regTerm;
1578
1579 //Log the loss value
1580 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1581
1582 // stop measuring
1583 tend = std::chrono::system_clock::now();
1584
1585 // Compute numerical throughput.
1586 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1587 std::chrono::duration<double> elapsed1 = t1-tstart;
1588 // std::chrono::duration<double> elapsed2 = t2-tstart;
1589 // time to compute training and test errors
1590 std::chrono::duration<double> elapsed_testing = tend-t1;
1591
1592 double seconds = elapsed_seconds.count();
1593 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1594 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1595 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1596
1597 converged =
1598 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1599
1600
1601 Log() << std::setw(10) << nTrainEpochs << " | "
1602 << std::setw(12) << trainingError
1603 << std::setw(12) << valError
1604 << std::setw(12) << seconds / settings.testInterval
1605 << std::setw(12) << elapsed_testing.count()
1606 << std::setw(12) << 1. / eventTime
1607 << std::setw(12) << convergenceCount
1608 << Endl;
1609
1610 if (converged) {
1611 Log() << Endl;
1612 }
1613 tstart = std::chrono::system_clock::now();
1614 }
1615
1616 // if (stepCount % 10 == 0 || converged) {
1617 if (converged && debug) {
1618 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1619 << Endl;
1620 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1621 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1622 for (size_t l = 0; l < weights_tensor.size(); ++l)
1623 weights_tensor[l].Print();
1624 bias_tensor[0].Print();
1625 }
1626
1627 }
1628
1629 trainingPhase++;
1630 } // end loop on training Phase
1631}
1632
1633////////////////////////////////////////////////////////////////////////////////
1635{
1636 if (fInteractive) {
1637 Log() << kFATAL << "Not implemented yet" << Endl;
1638 return;
1639 }
1640
1641 // using for training same scalar type defined for the prediction
1642 if (this->GetArchitectureString() == "GPU") {
1643#ifdef R__HAS_TMVAGPU
1644 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1645#ifdef R__HAS_CUDNN
1646 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1647#else
1648 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1649#endif
1650#else
1651 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1652 "you have CUDA installed and it was successfully "
1653 "detected by CMAKE."
1654 << Endl;
1655 return;
1656#endif
1657 } else if (this->GetArchitectureString() == "CPU") {
1658#ifdef R__HAS_TMVACPU
1659 // note that number of threads used for BLAS might be different
1660 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1661 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1662 << gConfig().GetNCpu() << Endl << Endl;
1663#else
1664 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1665 << Endl;
1666#endif
1667 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1668 return;
1669 }
1670 else {
1671 Log() << kFATAL << this->GetArchitectureString() <<
1672 " is not a supported architecture for TMVA::MethodDL"
1673 << Endl;
1674 }
1675
1676}
1677
1678
1679////////////////////////////////////////////////////////////////////////////////
1680Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1681{
1682
1683 // note that fNet should have been build with a batch size of 1
1684
1685 if (!fNet || fNet->GetDepth() == 0) {
1686 Log() << kFATAL << "The network has not been trained and fNet is not built"
1687 << Endl;
1688 }
1689
1690 // input size must be equal to 1 which is the batch size of fNet
1691 R__ASSERT(fNet->GetBatchSize() == 1);
1692
1693 // int batchWidth = fNet->GetBatchWidth();
1694 // int batchDepth = fNet->GetBatchDepth();
1695 // int batchHeight = fNet->GetBatchHeight();
1696// int noutput = fNet->GetOutputWidth();
1697
1698
1699 // get current event
1700 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1701
1702 size_t nVariables = GetEvent()->GetNVariables();
1703
1704 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1706 R__ASSERT(fXInput.GetShape().size() < 4);
1707 size_t nc, nhw = 0;
1708 if (fXInput.GetShape().size() == 2) {
1709 nc = fXInput.GetShape()[0];
1710 if (nc != 1 ) {
1712 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1713 << Endl;
1714 }
1715 nhw = fXInput.GetShape()[1];
1716 } else {
1717 nc = fXInput.GetCSize();
1718 nhw = fXInput.GetWSize();
1719 }
1720 if ( nVariables != nc * nhw) {
1721 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1722 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1723 << Endl;
1724 }
1725 for (size_t j = 0; j < nc; j++) {
1726 for (size_t k = 0; k < nhw; k++) {
1727 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1728 fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1729 }
1730 }
1731 } else {
1732 // row-wise layout
1733 assert(fXInput.GetShape().size() >= 4);
1734 size_t nc = fXInput.GetCSize();
1735 size_t nh = fXInput.GetHSize();
1736 size_t nw = fXInput.GetWSize();
1737 size_t n = nc * nh * nw;
1738 if ( nVariables != n) {
1739 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1740 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1741 << Endl;
1742 }
1743 for (size_t j = 0; j < n; j++) {
1744 // in this case TMVA event has same order as input tensor
1745 fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1746 }
1747 }
1748 // copy buffer in input
1749 fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1750
1751 // perform the prediction
1752 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1753
1754 // return value
1755 double mvaValue = (*fYHat)(0, 0);
1756
1757 // for debugging
1758#ifdef DEBUG_MVAVALUE
1759 using Tensor_t = std::vector<MatrixImpl_t>;
1760 TMatrixF xInput(n1,n2, inputValues.data() );
1761 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1762 xInput.Print();
1763 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1764 auto & deepnet = *fNet;
1765 std::cout << "Loop on layers " << std::endl;
1766 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1767 std::cout << "Layer " << l;
1768 const auto * layer = deepnet.GetLayerAt(l);
1769 const Tensor_t & layer_output = layer->GetOutput();
1770 layer->Print();
1771 std::cout << "DNN output " << layer_output.size() << std::endl;
1772 for (size_t i = 0; i < layer_output.size(); ++i) {
1773#ifdef R__HAS_TMVAGPU
1774 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1775 TMatrixD m = layer_output[i];
1776#else
1777 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1778#endif
1779 m.Print();
1780 }
1781 const Tensor_t & layer_weights = layer->GetWeights();
1782 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1783 if (layer_weights.size() > 0) {
1784 int i = 0;
1785#ifdef R__HAS_TMVAGPU
1786 TMatrixD m = layer_weights[i];
1787// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1788#else
1789 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1790#endif
1791 m.Print();
1792 }
1793 }
1794#endif
1795
1796 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1797}
1798////////////////////////////////////////////////////////////////////////////////
1799/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1800////////////////////////////////////////////////////////////////////////////////
1801template <typename Architecture_t>
1802std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1803{
1804
1805 // Check whether the model is setup
1806 if (!fNet || fNet->GetDepth() == 0) {
1807 Log() << kFATAL << "The network has not been trained and fNet is not built"
1808 << Endl;
1809 }
1810
1811 // rebuild the networks
1812 this->SetBatchSize(batchSize);
1813 size_t inputDepth = this->GetInputDepth();
1814 size_t inputHeight = this->GetInputHeight();
1815 size_t inputWidth = this->GetInputWidth();
1816 size_t batchDepth = this->GetBatchDepth();
1817 size_t batchHeight = this->GetBatchHeight();
1818 size_t batchWidth = this->GetBatchWidth();
1819 ELossFunction J = fNet->GetLossFunction();
1820 EInitialization I = fNet->GetInitialization();
1821 ERegularization R = fNet->GetRegularization();
1822 Double_t weightDecay = fNet->GetWeightDecay();
1823
1824 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1825 using Matrix_t = typename Architecture_t::Matrix_t;
1826 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1827
1828 // create the deep neural network
1829 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1830 std::vector<DeepNet_t> nets{};
1831 fBuildNet = false;
1832 CreateDeepNet(deepNet,nets);
1833
1834 // copy weights from the saved fNet to the built DeepNet
1835 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1836 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1837 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1838 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1839 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1840 // }
1841 }
1842
1843 size_t n1 = deepNet.GetBatchHeight();
1844 size_t n2 = deepNet.GetBatchWidth();
1845 size_t n0 = deepNet.GetBatchSize();
1846 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1847 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1848 n1 = deepNet.GetBatchSize();
1849 n0 = 1;
1850 }
1851 //this->SetBatchDepth(n0);
1852 Long64_t nEvents = lastEvt - firstEvt;
1853 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1854 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1855
1856
1857 // Tensor_t xInput;
1858 // for (size_t i = 0; i < n0; ++i)
1859 // xInput.emplace_back(Matrix_t(n1,n2));
1860
1861 // create pointer to output matrix used for the predictions
1862 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1863
1864 // use timer
1865 Timer timer( nEvents, GetName(), kTRUE );
1866
1867 if (logProgress)
1868 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1869 << "Evaluation of " << GetMethodName() << " on "
1870 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1871 << " sample (" << nEvents << " events)" << Endl;
1872
1873
1874 // eventg loop
1875 std::vector<double> mvaValues(nEvents);
1876
1877
1878 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1879
1880 Long64_t ievt_end = ievt + batchSize;
1881 // case of batch prediction for
1882 if (ievt_end <= lastEvt) {
1883
1884 if (ievt == firstEvt) {
1885 Data()->SetCurrentEvent(ievt);
1886 size_t nVariables = GetEvent()->GetNVariables();
1887
1888 if (n1 == batchSize && n0 == 1) {
1889 if (n2 != nVariables) {
1890 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1891 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1892 << Endl;
1893 }
1894 } else {
1895 if (n1*n2 != nVariables || n0 != batchSize) {
1896 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1897 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1898 << Endl;
1899 }
1900 }
1901 }
1902
1903 auto batch = testData.GetTensorBatch();
1904 auto inputTensor = batch.GetInput();
1905
1906 auto xInput = batch.GetInput();
1907 // make the prediction
1908 deepNet.Prediction(yHat, xInput, fOutputFunction);
1909 for (size_t i = 0; i < batchSize; ++i) {
1910 double value = yHat(i,0);
1911 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1912 }
1913 }
1914 else {
1915 // case of remaining events: compute prediction by single event !
1916 for (Long64_t i = ievt; i < lastEvt; ++i) {
1917 Data()->SetCurrentEvent(i);
1918 mvaValues[i] = GetMvaValue();
1919 }
1920 }
1921 }
1922
1923 if (logProgress) {
1924 Log() << kINFO
1925 << "Elapsed time for evaluation of " << nEvents << " events: "
1926 << timer.GetElapsedTime() << " " << Endl;
1927 }
1928
1929 return mvaValues;
1930}
1931
1932const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1933{
1934 size_t nVariables = GetEvent()->GetNVariables();
1935 MatrixImpl_t X(1, nVariables);
1936 TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1937 const Event *ev = GetEvent();
1938 const std::vector<Float_t>& inputValues = ev->GetValues();
1939 for (size_t i = 0; i < nVariables; i++) {
1940 X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1941 }
1942 //X_vec.emplace_back(X);
1943
1944 size_t nTargets = std::max(1u, ev->GetNTargets());
1945 MatrixImpl_t YHat(1, nTargets);
1946 std::vector<Float_t> output(nTargets);
1947 fNet->Prediction(YHat, X_vec, fOutputFunction);
1948
1949 for (size_t i = 0; i < nTargets; i++)
1950 output[i] = YHat(0, i);
1951
1952 if (fRegressionReturnVal == NULL) {
1953 fRegressionReturnVal = new std::vector<Float_t>();
1954 }
1955 fRegressionReturnVal->clear();
1956
1957 Event * evT = new Event(*ev);
1958 for (size_t i = 0; i < nTargets; ++i) {
1959 evT->SetTarget(i, output[i]);
1960 }
1961
1962 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1963 for (size_t i = 0; i < nTargets; ++i) {
1964 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1965 }
1966 delete evT;
1967 return *fRegressionReturnVal;
1968}
1969
1970const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1971{
1972 size_t nVariables = GetEvent()->GetNVariables();
1973 MatrixImpl_t X(1, nVariables);
1974 TensorImpl_t X_vec ( 1, 1, nVariables);
1975 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1976 if (fMulticlassReturnVal == NULL) {
1977 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1978 }
1979
1980 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1981 for (size_t i = 0; i < nVariables; i++) {
1982 X_vec(0,i, 0) = inputValues[i];
1983 }
1984 //X_vec.emplace_back(X);
1985 fNet->Prediction(YHat, X_vec, fOutputFunction);
1986 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1987 (*fMulticlassReturnVal)[i] = YHat(0, i);
1988 }
1989 return *fMulticlassReturnVal;
1990}
1991
1992
1993////////////////////////////////////////////////////////////////////////////////
1994/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1995////////////////////////////////////////////////////////////////////////////////
1996std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
1997{
1998 // Long64_t nEvents = Data()->GetNEvents();
1999 // std::vector<Double_t> v(nEvents);
2000 // for (Long64_t i = 0; i < nEvents; ++i) {
2001 // Data()->SetCurrentEvent(i);
2002 // v[i] = GetMvaValue();
2003 // }
2004 // return v;
2005
2006
2007 Long64_t nEvents = Data()->GetNEvents();
2008 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2009 if (firstEvt < 0) firstEvt = 0;
2010 nEvents = lastEvt-firstEvt;
2011
2012 // use same batch size as for training (from first strategy)
2013 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2014 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2015 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2016
2017 // using for training same scalar type defined for the prediction
2018 if (this->GetArchitectureString() == "GPU") {
2019#ifdef R__HAS_TMVAGPU
2020 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2021#ifdef R__HAS_CUDNN
2022 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2023#else
2024 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2025#endif
2026
2027#endif
2028 }
2029 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2030 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2031}
2032////////////////////////////////////////////////////////////////////////////////
2033void MethodDL::AddWeightsXMLTo(void * parent) const
2034{
2035 // Create the parent XML node with name "Weights"
2036 auto & xmlEngine = gTools().xmlengine();
2037 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2038
2039 /*! Get all necessary information, in order to be able to reconstruct the net
2040 * if we read the same XML file. */
2041
2042 // Deep Net specific info
2043 Int_t depth = fNet->GetDepth();
2044
2045 Int_t inputDepth = fNet->GetInputDepth();
2046 Int_t inputHeight = fNet->GetInputHeight();
2047 Int_t inputWidth = fNet->GetInputWidth();
2048
2049 Int_t batchSize = fNet->GetBatchSize();
2050
2051 Int_t batchDepth = fNet->GetBatchDepth();
2052 Int_t batchHeight = fNet->GetBatchHeight();
2053 Int_t batchWidth = fNet->GetBatchWidth();
2054
2055 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2056 char initialization = static_cast<char>(fNet->GetInitialization());
2057 char regularization = static_cast<char>(fNet->GetRegularization());
2058
2059 Double_t weightDecay = fNet->GetWeightDecay();
2060
2061 // Method specific info (not sure these are needed)
2062 char outputFunction = static_cast<char>(this->GetOutputFunction());
2063 //char lossFunction = static_cast<char>(this->GetLossFunction());
2064
2065 // Add attributes to the parent node
2066 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2067
2068 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2069 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2070 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2071
2072 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2073 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2074 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2075 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2076
2077 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2078 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2079 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2080 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2081
2082 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2083
2084
2085 for (Int_t i = 0; i < depth; i++)
2086 {
2087 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2088 }
2089
2090
2091}
2092
2093////////////////////////////////////////////////////////////////////////////////
2095{
2096
2097 auto netXML = gTools().GetChild(rootXML, "Weights");
2098 if (!netXML){
2099 netXML = rootXML;
2100 }
2101
2102 size_t netDepth;
2103 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2104
2105 size_t inputDepth, inputHeight, inputWidth;
2106 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2107 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2108 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2109
2110 size_t batchSize, batchDepth, batchHeight, batchWidth;
2111 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2112 // use always batchsize = 1
2113 //batchSize = 1;
2114 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2115 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2116 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2117
2118 char lossFunctionChar;
2119 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2120 char initializationChar;
2121 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2122 char regularizationChar;
2123 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2124 char outputFunctionChar;
2125 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2126 double weightDecay;
2127 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2128
2129 // create the net
2130
2131 // DeepNetCpu_t is defined in MethodDL.h
2132 this->SetInputDepth(inputDepth);
2133 this->SetInputHeight(inputHeight);
2134 this->SetInputWidth(inputWidth);
2135 this->SetBatchDepth(batchDepth);
2136 this->SetBatchHeight(batchHeight);
2137 this->SetBatchWidth(batchWidth);
2138
2139
2140
2141 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2142 batchHeight, batchWidth,
2143 static_cast<ELossFunction>(lossFunctionChar),
2144 static_cast<EInitialization>(initializationChar),
2145 static_cast<ERegularization>(regularizationChar),
2146 weightDecay));
2147
2148 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2149
2150
2151 //size_t previousWidth = inputWidth;
2152 auto layerXML = gTools().xmlengine().GetChild(netXML);
2153
2154 // loop on the layer and add them to the network
2155 for (size_t i = 0; i < netDepth; i++) {
2156
2157 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2158
2159 // case of dense layer
2160 if (layerName == "DenseLayer") {
2161
2162 // read width and activation function and then we can create the layer
2163 size_t width = 0;
2164 gTools().ReadAttr(layerXML, "Width", width);
2165
2166 // Read activation function.
2167 TString funcString;
2168 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2169 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2170
2171
2172 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2173
2174 }
2175 // Convolutional Layer
2176 else if (layerName == "ConvLayer") {
2177
2178 // read width and activation function and then we can create the layer
2179 size_t depth = 0;
2180 gTools().ReadAttr(layerXML, "Depth", depth);
2181 size_t fltHeight, fltWidth = 0;
2182 size_t strideRows, strideCols = 0;
2183 size_t padHeight, padWidth = 0;
2184 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2185 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2186 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2187 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2188 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2189 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2190
2191 // Read activation function.
2192 TString funcString;
2193 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2194 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2195
2196
2197 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2198 padHeight, padWidth, actFunction);
2199
2200 }
2201
2202 // MaxPool Layer
2203 else if (layerName == "MaxPoolLayer") {
2204
2205 // read maxpool layer info
2206 size_t filterHeight, filterWidth = 0;
2207 size_t strideRows, strideCols = 0;
2208 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2209 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2210 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2211 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2212
2213 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2214 }
2215 // Reshape Layer
2216 else if (layerName == "ReshapeLayer") {
2217
2218 // read reshape layer info
2219 size_t depth, height, width = 0;
2220 gTools().ReadAttr(layerXML, "Depth", depth);
2221 gTools().ReadAttr(layerXML, "Height", height);
2222 gTools().ReadAttr(layerXML, "Width", width);
2223 int flattening = 0;
2224 gTools().ReadAttr(layerXML, "Flattening",flattening );
2225
2226 fNet->AddReshapeLayer(depth, height, width, flattening);
2227
2228 }
2229 // RNN Layer
2230 else if (layerName == "RNNLayer") {
2231
2232 // read RNN layer info
2233 size_t stateSize,inputSize, timeSteps = 0;
2234 int rememberState= 0;
2235 int returnSequence = 0;
2236 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2237 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2238 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2239 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2240 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2241
2242 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2243
2244 }
2245 // LSTM Layer
2246 else if (layerName == "LSTMLayer") {
2247
2248 // read RNN layer info
2249 size_t stateSize,inputSize, timeSteps = 0;
2250 int rememberState, returnSequence = 0;
2251 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2252 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2253 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2254 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2255 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2256
2257 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2258
2259 }
2260 // GRU Layer
2261 else if (layerName == "GRULayer") {
2262
2263 // read RNN layer info
2264 size_t stateSize,inputSize, timeSteps = 0;
2265 int rememberState, returnSequence, resetGateAfter = 0;
2266 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2267 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2268 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2269 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2270 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2271 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2272
2273 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2274 Warning("ReadWeightsFromXML",
2275 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2276
2277 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2278 }
2279 // BatchNorm Layer
2280 else if (layerName == "BatchNormLayer") {
2281 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2282 fNet->AddBatchNormLayer(0., 0.0);
2283 }
2284 // read weights and biases
2285 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2286
2287 // read next layer
2288 layerXML = gTools().GetNextChild(layerXML);
2289 }
2290
2291 fBuildNet = false;
2292 // create now the input and output matrices
2293 //int n1 = batchHeight;
2294 //int n2 = batchWidth;
2295 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2296 //if (fXInput.size() > 0) fXInput.clear();
2297 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2299 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2300 // make here a ColumnMajor tensor
2303
2304 // create pointer to output matrix used for the predictions
2305 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2306
2307
2308}
2309
2310
2311////////////////////////////////////////////////////////////////////////////////
2312void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2313{
2314}
2315
2316////////////////////////////////////////////////////////////////////////////////
2318{
2319 // TODO
2320 return NULL;
2321}
2322
2323////////////////////////////////////////////////////////////////////////////////
2325{
2326 // TODO
2327}
2328
2329} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
unsigned int UInt_t
Definition: RtypesCore.h:44
const Bool_t kFALSE
Definition: RtypesCore.h:90
double Double_t
Definition: RtypesCore.h:57
long long Long64_t
Definition: RtypesCore.h:71
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
The Formula class.
Definition: TFormula.h:84
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3305
UInt_t GetNCpu()
Definition: Config.h:72
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:44
Adagrad Optimizer class.
Definition: Adagrad.h:44
Adam Optimizer class.
Definition: Adam.h:44
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:862
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
static bool IsCudnn()
Definition: Cpu.h:131
Generic Deep Neural Network class.
Definition: DeepNet.h:75
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:827
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:610
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:742
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:569
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:487
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:441
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:775
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:526
Generic layer class.
Definition: DenseLayer.h:57
RMSProp Optimizer class.
Definition: RMSProp.h:44
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:45
Generic General Layer class.
Definition: GeneralLayer.h:49
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:393
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
UInt_t GetClass() const
Definition: Event.h:86
std::vector< Float_t > & GetValues()
Definition: Event.h:94
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
const char * GetName() const
Definition: MethodBase.h:333
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:684
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition: MethodBase.h:345
const TString & GetMethodName() const
Definition: MethodBase.h:330
const Event * GetEvent() const
Definition: MethodBase.h:749
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
UInt_t GetNVariables() const
Definition: MethodBase.h:344
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:593
UInt_t GetNvar() const
Definition: MethodBase.h:343
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
DataSet * Data() const
Definition: MethodBase.h:408
IPythonInteractive * fInteractive
Definition: MethodBase.h:446
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:104
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:175
void GetHelpMessage() const
Definition: MethodDL.cxx:2324
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:182
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1970
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:170
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:186
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:278
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:200
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1634
size_t GetBatchHeight() const
Definition: MethodDL.h:255
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:1996
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:189
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:767
TensorImpl_t fXInput
Definition: MethodDL.h:198
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:178
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:190
void Init()
default initializations
Definition: MethodDL.cxx:429
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1018
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1163
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:272
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:261
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:580
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:265
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:280
void ProcessOptions()
Definition: MethodDL.cxx:227
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:199
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:176
size_t GetInputDepth() const
Definition: MethodDL.h:247
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1932
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:201
TString GetInputLayoutString() const
Definition: MethodDL.h:264
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:285
size_t GetInputHeight() const
Definition: MethodDL.h:248
TString GetArchitectureString() const
Definition: MethodDL.h:270
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:479
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:889
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2312
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2094
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:191
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:89
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:181
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:180
size_t GetBatchDepth() const
Definition: MethodDL.h:254
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:930
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:196
size_t GetInputWidth() const
Definition: MethodDL.h:249
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:281
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:262
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:185
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1090
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:668
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:828
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:188
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2317
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:106
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:284
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1051
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:286
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1802
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:260
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:283
TString GetLayoutString() const
Definition: MethodDL.h:266
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:174
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:102
size_t GetBatchWidth() const
Definition: MethodDL.h:256
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2033
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:103
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1680
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1044
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:436
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:193
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:279
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:526
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:187
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:162
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:184
EMsgType GetMinType() const
Definition: MsgLogger.h:71
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:147
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TXMLEngine & xmlengine()
Definition: Tools.h:268
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1234
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:877
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:891
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:499
const char * Data() const
Definition: TString.h:364
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
@ kTrailing
Definition: TString.h:262
@ kBoth
Definition: TString.h:262
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition: legend1.C:16
#define I(x, y, z)
constexpr size_t block
Definition: BatchHelpers.h:29
double T(double x)
Definition: ChebyshevPol.h:34
static const std::string separator("@@@")
EInitialization
Definition: Functions.h:72
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:70
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:882
Double_t Log(Double_t x)
Definition: TMath.h:750
All of the options that can be specified in the training string.
Definition: MethodDL.h:69
DNN::EOptimizer optimizer
Definition: MethodDL.h:75
DNN::ERegularization regularization
Definition: MethodDL.h:74
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:80
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20
REAL epsilon
Definition: triangle.c:617
static void output(int code)
Definition: gifencode.c:226