Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
60
61using namespace TMVA::DNN::CNN;
62using namespace TMVA::DNN;
63
69
70
71namespace TMVA {
72
73
74////////////////////////////////////////////////////////////////////////////////
75TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
76{
77 key.ToUpper();
78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
80 return TString("");
81 }
82 return it->second;
83}
84
85////////////////////////////////////////////////////////////////////////////////
86template <typename T>
87T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
88
89////////////////////////////////////////////////////////////////////////////////
90template <>
91int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
92{
93 TString value(fetchValueTmp(keyValueMap, key));
94 if (value == "") {
95 return defaultValue;
96 }
97 return value.Atoi();
98}
99
100////////////////////////////////////////////////////////////////////////////////
101template <>
102double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
103{
104 TString value(fetchValueTmp(keyValueMap, key));
105 if (value == "") {
106 return defaultValue;
107 }
108 return value.Atof();
109}
110
111////////////////////////////////////////////////////////////////////////////////
112template <>
113TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
114{
115 TString value(fetchValueTmp(keyValueMap, key));
116 if (value == "") {
117 return defaultValue;
118 }
119 return value;
120}
121
122////////////////////////////////////////////////////////////////////////////////
123template <>
124bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
125{
126 TString value(fetchValueTmp(keyValueMap, key));
127 if (value == "") {
128 return defaultValue;
129 }
130
131 value.ToUpper();
132 if (value == "TRUE" || value == "T" || value == "1") {
133 return true;
134 }
135
136 return false;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140template <>
141std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
142 std::vector<double> defaultValue)
143{
144 TString parseString(fetchValueTmp(keyValueMap, key));
145 if (parseString == "") {
146 return defaultValue;
147 }
148
149 parseString.ToUpper();
150 std::vector<double> values;
151
152 const TString tokenDelim("+");
153 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
154 TIter nextToken(tokenStrings);
155 TObjString *tokenString = (TObjString *)nextToken();
156 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
157 std::stringstream sstr;
158 double currentValue;
159 sstr << tokenString->GetString().Data();
160 sstr >> currentValue;
161 values.push_back(currentValue);
162 }
163 return values;
164}
165
166////////////////////////////////////////////////////////////////////////////////
168{
169 // Set default values for all option strings
170
171 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
172
173 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
174
175 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
176
177 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
178 " or cross entropy (binary classification).");
179 AddPreDefVal(TString("CROSSENTROPY"));
180 AddPreDefVal(TString("SUMOFSQUARES"));
181 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
182
183 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
184 AddPreDefVal(TString("XAVIER"));
185 AddPreDefVal(TString("XAVIERUNIFORM"));
186 AddPreDefVal(TString("GAUSS"));
187 AddPreDefVal(TString("UNIFORM"));
188 AddPreDefVal(TString("IDENTITY"));
189 AddPreDefVal(TString("ZERO"));
190
191 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
192
193 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
196
197 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
198 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
199 AddPreDefVal(TString("CPU"));
200 AddPreDefVal(TString("GPU"));
201 AddPreDefVal(TString("OPENCL")); // not yet implemented
202 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
203
204 // define training strategy separated by a separator "|"
205 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
206 "Momentum=0.0,"
207 "ConvergenceSteps=100,"
208 "MaxEpochs=2000,"
209 "Optimizer=ADAM,"
210 "BatchSize=30,"
211 "TestRepetitions=1,"
212 "WeightDecay=0.0,"
213 "Regularization=None,"
214 "DropConfig=0.0",
215 "TrainingStrategy", "Defines the training strategies.");
216}
217
218////////////////////////////////////////////////////////////////////////////////
220{
221
223 Log() << kINFO << "Will ignore negative events in training!" << Endl;
224 }
225
226 if (fArchitectureString == "STANDARD") {
227 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
231 << Endl;
232 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
233 fArchitectureString = "CPU";
234 }
235 if (fArchitectureString == "OPENCL") {
236 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
240 << Endl;
241 // use instead GPU
242 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
243 fArchitectureString = "GPU";
244 }
245
246 // the architecture can now be set at runtime as an option
247
248
249 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
250#ifdef R__HAS_TMVAGPU
251 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
252#else // case TMVA does not support GPU
253 Log() << kERROR << "CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
256 << Endl;
257 fArchitectureString = "CPU";
258 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
259#endif
260 }
261
262 if (fArchitectureString == "CPU") {
263#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
264 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
265#else // TMVA has no CPU BLAS or IMT support
266 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
269 << Endl;
270 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
271#endif
272 }
273
274 // Input Layout
277
278 // Loss function and output.
281 if (fErrorStrategy == "SUMOFSQUARES") {
282 fLossFunction = ELossFunction::kMeanSquaredError;
283 }
284 if (fErrorStrategy == "CROSSENTROPY") {
286 }
288 } else if (fAnalysisType == Types::kRegression) {
289 if (fErrorStrategy != "SUMOFSQUARES") {
290 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
291 << " neural net error function. Setting error function to "
292 << " SUMOFSQUARES now." << Endl;
293 }
294
295 fLossFunction = ELossFunction::kMeanSquaredError;
297 } else if (fAnalysisType == Types::kMulticlass) {
298 if (fErrorStrategy == "SUMOFSQUARES") {
299 fLossFunction = ELossFunction::kMeanSquaredError;
300 }
301 if (fErrorStrategy == "CROSSENTROPY") {
303 }
304 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
305 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
306 }
308 }
309
310 // Initialization
311 // the biases will be always initialized to zero
312 if (fWeightInitializationString == "XAVIER") {
314 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
316 } else if (fWeightInitializationString == "GAUSS") {
318 } else if (fWeightInitializationString == "UNIFORM") {
320 } else if (fWeightInitializationString == "ZERO") {
322 } else if (fWeightInitializationString == "IDENTITY") {
324 } else {
326 }
327
328 // Training settings.
329
331 for (auto &block : strategyKeyValues) {
332 TTrainingSettings settings;
333
334 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
335 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
336 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
337 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
338 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
339 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
340 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
341 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
342
343 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
344 if (regularization == "L1") {
346 } else if (regularization == "L2") {
348 } else {
350 }
351
352 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
353 settings.optimizerName = optimizer;
354 if (optimizer == "SGD") {
356 } else if (optimizer == "ADAM") {
358 } else if (optimizer == "ADAGRAD") {
360 } else if (optimizer == "RMSPROP") {
362 } else if (optimizer == "ADADELTA") {
364 } else {
365 // Make Adam as default choice if the input string is
366 // incorrect.
368 settings.optimizerName = "ADAM";
369 }
370 // check for specific optimizer parameters
371 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
372 //default values
373 std::map<TString, double> defaultValues = {
374 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
375 {"ADAGRAD_eps", 1.E-8},
376 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
377 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
378 };
379 for (auto &pN : optimParamLabels) {
380 TString optimParamName = settings.optimizerName + pN;
381 // check if optimizer has default values for this specific parameters
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
384 double val = fetchValueTmp(block, optimParamName, defValue);
385 // create entry in settings for this optimizer parameter
386 settings.optimizerParams[optimParamName] = val;
387 }
388 }
389
390 fTrainingSettings.push_back(settings);
391 }
392
393 // this set fInputShape[0] = batchSize
394 this->SetBatchSize(fTrainingSettings.front().batchSize);
395
396 // case inputlayout and batch layout was not given. Use default then
397 // (1, batchsize, nvariables)
398 // fInputShape[0] -> BatchSize
399 // fInputShape[1] -> InputDepth
400 // fInputShape[2] -> InputHeight
401 // fInputShape[3] -> InputWidth
402 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
403 fInputShape[1] = 1;
404 fInputShape[2] = 1;
406 }
407 // case when batch layout is not provided (all zero)
408 // batch layout can be determined by the input layout + batch size
409 // case DNN : { 1, B, W }
410 // case CNN : { B, C, H*W}
411 // case RNN : { B, T, H*W }
412
413 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
414 // case first layer is DENSE
415 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
416 // case of (1, batchsize, input features)
417 fBatchDepth = 1;
418 fBatchHeight = fTrainingSettings.front().batchSize;
420 }
421 else { // more general cases (e.g. for CNN)
422 // case CONV or RNN
423 fBatchDepth = fTrainingSettings.front().batchSize;
426 }
427 }
428}
429
430////////////////////////////////////////////////////////////////////////////////
431/// default initializations
433{
434 // Nothing to do here
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// Parse the input layout
440{
441 // Define the delimiter
442 const TString delim("|");
443
444 // Get the input layout string
445 TString inputLayoutString = this->GetInputLayoutString();
446
447 // Split the input layout string
448 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
449 TIter nextInputDim(inputDimStrings);
450 TObjString *inputDimString = (TObjString *)nextInputDim();
451
452 // Go through every token and save its absolute value in the shape array
453 // The first token is the batch size for easy compatibility with cudnn
454 int subDim = 1;
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.Length()/2 + 2);
457 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
458 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
459 // size_t is unsigned
460 subDim = (size_t) abs(inputDimString->GetString().Atoi());
461 // Size among unused dimensions should be set to 1 for cudnn
462 //if (subDim == 0) subDim = 1;
463 inputShape.push_back(subDim);
464 }
465 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
466 // for example in case of dense layer input layouts
467 // when we will support 3D convolutions we would need to add extra 1's
468 if (inputShape.size() == 2) {
469 // case of dense layer where only width is specified
470 inputShape.insert(inputShape.begin() + 1, {1,1});
471 }
472 else if (inputShape.size() == 3) {
473 //e.g. case of RNN T,W -> T,1,W
474 inputShape.insert(inputShape.begin() + 2, 1);
475 }
476
477 this->SetInputShape(inputShape);
478}
479
480////////////////////////////////////////////////////////////////////////////////
481/// Parse the input layout
483{
484 // Define the delimiter
485 const TString delim("|");
486
487 // Get the input layout string
488 TString batchLayoutString = this->GetBatchLayoutString();
489
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
493
494 // Split the input layout string
495 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
496 TIter nextBatchDim(batchDimStrings);
497 TObjString *batchDimString = (TObjString *)nextBatchDim();
498 int idxToken = 0;
499
500 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
501 switch (idxToken) {
502 case 0: // input depth
503 {
504 TString strDepth(batchDimString->GetString());
505 batchDepth = (size_t)strDepth.Atoi();
506 } break;
507 case 1: // input height
508 {
509 TString strHeight(batchDimString->GetString());
510 batchHeight = (size_t)strHeight.Atoi();
511 } break;
512 case 2: // input width
513 {
514 TString strWidth(batchDimString->GetString());
515 batchWidth = (size_t)strWidth.Atoi();
516 } break;
517 }
518 ++idxToken;
519 }
520
521 this->SetBatchDepth(batchDepth);
522 this->SetBatchHeight(batchHeight);
523 this->SetBatchWidth(batchWidth);
524}
525
526////////////////////////////////////////////////////////////////////////////////
527/// Create a deep net based on the layout string
528template <typename Architecture_t, typename Layer_t>
531{
532 // Layer specification, layer details
533 const TString layerDelimiter(",");
534 const TString subDelimiter("|");
535
536 TString layoutString = this->GetLayoutString();
537
538 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
539
540 // Split layers
541 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
542 TIter nextLayer(layerStrings);
543 TObjString *layerString = (TObjString *)nextLayer();
544
545
546 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
547
548 // Split layer details
549 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550 TIter nextToken(subStrings);
551 TObjString *token = (TObjString *)nextToken();
552
553 // Determine the type of the layer
554 TString strLayerType = token->GetString();
555
556
557 if (strLayerType == "DENSE") {
558 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
559 } else if (strLayerType == "CONV") {
560 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
561 } else if (strLayerType == "MAXPOOL") {
562 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
563 } else if (strLayerType == "RESHAPE") {
564 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
565 } else if (strLayerType == "BNORM") {
566 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
567 } else if (strLayerType == "RNN") {
568 ParseRecurrentLayer(kLayerRNN, deepNet, nets, layerString->GetString(), subDelimiter);
569 } else if (strLayerType == "LSTM") {
570 ParseRecurrentLayer(kLayerLSTM, deepNet, nets, layerString->GetString(), subDelimiter);
571 } else if (strLayerType == "GRU") {
572 ParseRecurrentLayer(kLayerGRU, deepNet, nets, layerString->GetString(), subDelimiter);
573 } else {
574 // no type of layer specified - assume is dense layer as in old DNN interface
575 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
576 }
577 }
578}
579
580////////////////////////////////////////////////////////////////////////////////
581/// Pases the layer string and creates the appropriate dense layer
582template <typename Architecture_t, typename Layer_t>
584 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
585 TString delim)
586{
587 int width = 0;
589
590 // this return number of input variables for the method
591 // it can be used to deduce width of dense layer if specified as N+10
592 // where N is the number of input variables
593 const size_t inputSize = GetNvar();
594
595 // Split layer details
596 TObjArray *subStrings = layerString.Tokenize(delim);
597 TIter nextToken(subStrings);
598 TObjString *token = (TObjString *)nextToken();
599
600 // loop on the tokens
601 // order of sepcifying width and activation function is not relevant
602 // both 100|TANH and TANH|100 are valid cases
603 for (; token != nullptr; token = (TObjString *)nextToken()) {
604 // try a match with the activation function
605 TString strActFnc(token->GetString());
606 // if first token defines the layer type- skip it
607 if (strActFnc =="DENSE") continue;
608
609 if (strActFnc == "RELU") {
610 activationFunction = DNN::EActivationFunction::kRelu;
611 } else if (strActFnc == "TANH") {
612 activationFunction = DNN::EActivationFunction::kTanh;
613 } else if (strActFnc == "FTANH") {
614 activationFunction = DNN::EActivationFunction::kFastTanh;
615 } else if (strActFnc == "SYMMRELU") {
616 activationFunction = DNN::EActivationFunction::kSymmRelu;
617 } else if (strActFnc == "SOFTSIGN") {
618 activationFunction = DNN::EActivationFunction::kSoftSign;
619 } else if (strActFnc == "SIGMOID") {
620 activationFunction = DNN::EActivationFunction::kSigmoid;
621 } else if (strActFnc == "LINEAR") {
622 activationFunction = DNN::EActivationFunction::kIdentity;
623 } else if (strActFnc == "GAUSS") {
624 activationFunction = DNN::EActivationFunction::kGauss;
625 } else if (width == 0) {
626 // no match found try to parse as text showing the width
627 // support for input a formula where the variable 'x' is 'N' in the string
628 // use TFormula for the evaluation
629 TString strNumNodes = strActFnc;
630 // number of nodes
631 TString strN("x");
632 strNumNodes.ReplaceAll("N", strN);
633 strNumNodes.ReplaceAll("n", strN);
634 TFormula fml("tmp", strNumNodes);
635 width = fml.Eval(inputSize);
636 }
637 }
638 // avoid zero width. assume is last layer and give width = output width
639 // Determine the number of outputs
640 size_t outputSize = 1;
642 outputSize = GetNTargets();
643 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
644 outputSize = DataInfo().GetNClasses();
645 }
646 if (width == 0) width = outputSize;
647
648 // Add the dense layer, initialize the weights and biases and copy
649 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
650 denseLayer->Initialize();
651
652 // add same layer to fNet
653 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
654
655 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
656
657 // add the copy to all slave nets
658 //for (size_t i = 0; i < nets.size(); i++) {
659 // nets[i].AddDenseLayer(copyDenseLayer);
660 //}
661
662 // check compatibility of added layer
663 // for a dense layer input should be 1 x 1 x DxHxW
664}
665
666////////////////////////////////////////////////////////////////////////////////
667/// Pases the layer string and creates the appropriate convolutional layer
668template <typename Architecture_t, typename Layer_t>
670 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
671 TString delim)
672{
673 int depth = 0;
674 int fltHeight = 0;
675 int fltWidth = 0;
676 int strideRows = 0;
677 int strideCols = 0;
678 int zeroPadHeight = 0;
679 int zeroPadWidth = 0;
681
682 // Split layer details
683 TObjArray *subStrings = layerString.Tokenize(delim);
684 TIter nextToken(subStrings);
685 TObjString *token = (TObjString *)nextToken();
686 int idxToken = 0;
687
688 for (; token != nullptr; token = (TObjString *)nextToken()) {
689 switch (idxToken) {
690 case 1: // depth
691 {
692 TString strDepth(token->GetString());
693 depth = strDepth.Atoi();
694 } break;
695 case 2: // filter height
696 {
697 TString strFltHeight(token->GetString());
698 fltHeight = strFltHeight.Atoi();
699 } break;
700 case 3: // filter width
701 {
702 TString strFltWidth(token->GetString());
703 fltWidth = strFltWidth.Atoi();
704 } break;
705 case 4: // stride in rows
706 {
707 TString strStrideRows(token->GetString());
708 strideRows = strStrideRows.Atoi();
709 } break;
710 case 5: // stride in cols
711 {
712 TString strStrideCols(token->GetString());
713 strideCols = strStrideCols.Atoi();
714 } break;
715 case 6: // zero padding height
716 {
717 TString strZeroPadHeight(token->GetString());
718 zeroPadHeight = strZeroPadHeight.Atoi();
719 } break;
720 case 7: // zero padding width
721 {
722 TString strZeroPadWidth(token->GetString());
723 zeroPadWidth = strZeroPadWidth.Atoi();
724 } break;
725 case 8: // activation function
726 {
727 TString strActFnc(token->GetString());
728 if (strActFnc == "RELU") {
729 activationFunction = DNN::EActivationFunction::kRelu;
730 } else if (strActFnc == "TANH") {
731 activationFunction = DNN::EActivationFunction::kTanh;
732 } else if (strActFnc == "SYMMRELU") {
733 activationFunction = DNN::EActivationFunction::kSymmRelu;
734 } else if (strActFnc == "SOFTSIGN") {
735 activationFunction = DNN::EActivationFunction::kSoftSign;
736 } else if (strActFnc == "SIGMOID") {
737 activationFunction = DNN::EActivationFunction::kSigmoid;
738 } else if (strActFnc == "LINEAR") {
739 activationFunction = DNN::EActivationFunction::kIdentity;
740 } else if (strActFnc == "GAUSS") {
741 activationFunction = DNN::EActivationFunction::kGauss;
742 }
743 } break;
744 }
745 ++idxToken;
746 }
747
748 // Add the convolutional layer, initialize the weights and biases and copy
749 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
750 zeroPadHeight, zeroPadWidth, activationFunction);
751 convLayer->Initialize();
752
753 // Add same layer to fNet
754 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
755 zeroPadHeight, zeroPadWidth, activationFunction);
756
757 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
758
759 //// add the copy to all slave nets
760 //for (size_t i = 0; i < nets.size(); i++) {
761 // nets[i].AddConvLayer(copyConvLayer);
762 //}
763}
764
765////////////////////////////////////////////////////////////////////////////////
766/// Pases the layer string and creates the appropriate max pool layer
767template <typename Architecture_t, typename Layer_t>
769 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
770 TString delim)
771{
772
773 int filterHeight = 0;
774 int filterWidth = 0;
775 int strideRows = 0;
776 int strideCols = 0;
777
778 // Split layer details
779 TObjArray *subStrings = layerString.Tokenize(delim);
780 TIter nextToken(subStrings);
781 TObjString *token = (TObjString *)nextToken();
782 int idxToken = 0;
783
784 for (; token != nullptr; token = (TObjString *)nextToken()) {
785 switch (idxToken) {
786 case 1: // filter height
787 {
788 TString strFrmHeight(token->GetString());
789 filterHeight = strFrmHeight.Atoi();
790 } break;
791 case 2: // filter width
792 {
793 TString strFrmWidth(token->GetString());
794 filterWidth = strFrmWidth.Atoi();
795 } break;
796 case 3: // stride in rows
797 {
798 TString strStrideRows(token->GetString());
799 strideRows = strStrideRows.Atoi();
800 } break;
801 case 4: // stride in cols
802 {
803 TString strStrideCols(token->GetString());
804 strideCols = strStrideCols.Atoi();
805 } break;
806 }
807 ++idxToken;
808 }
809
810 // Add the Max pooling layer
811 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
812 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
813
814 // Add the same layer to fNet
815 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
816
817
818 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
819
820 //// add the copy to all slave nets
821 //for (size_t i = 0; i < nets.size(); i++) {
822 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
823 //}
824}
825
826////////////////////////////////////////////////////////////////////////////////
827/// Pases the layer string and creates the appropriate reshape layer
828template <typename Architecture_t, typename Layer_t>
830 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
831 TString delim)
832{
833 int depth = 0;
834 int height = 0;
835 int width = 0;
836 bool flattening = false;
837
838 // Split layer details
839 TObjArray *subStrings = layerString.Tokenize(delim);
840 TIter nextToken(subStrings);
841 TObjString *token = (TObjString *)nextToken();
842 int idxToken = 0;
843
844 for (; token != nullptr; token = (TObjString *)nextToken()) {
845 if (token->GetString() == "FLAT") idxToken=4;
846 switch (idxToken) {
847 case 1: {
848 TString strDepth(token->GetString());
849 depth = strDepth.Atoi();
850 } break;
851 case 2: // height
852 {
853 TString strHeight(token->GetString());
854 height = strHeight.Atoi();
855 } break;
856 case 3: // width
857 {
858 TString strWidth(token->GetString());
859 width = strWidth.Atoi();
860 } break;
861 case 4: // flattening
862 {
863 TString flat(token->GetString());
864 if (flat == "FLAT") {
865 flattening = true;
866 }
867 } break;
868 }
869 ++idxToken;
870 }
871
872 // Add the reshape layer
873 // TReshapeLayer<Architecture_t> *reshapeLayer =
874 deepNet.AddReshapeLayer(depth, height, width, flattening);
875
876 // Add the same layer to fNet
877 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
878
879 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
880
881 //// add the copy to all slave nets
882 //for (size_t i = 0; i < nets.size(); i++) {
883 // nets[i].AddReshapeLayer(copyReshapeLayer);
884 //}
885}
886
887////////////////////////////////////////////////////////////////////////////////
888/// Pases the layer string and creates the appropriate reshape layer
889template <typename Architecture_t, typename Layer_t>
891 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
892 TString delim)
893{
894
895 // default values
896 double momentum = -1; //0.99;
897 double epsilon = 0.0001;
898
899 // Split layer details
900 TObjArray *subStrings = layerString.Tokenize(delim);
901 TIter nextToken(subStrings);
902 TObjString *token = (TObjString *)nextToken();
903 int idxToken = 0;
904
905 for (; token != nullptr; token = (TObjString *)nextToken()) {
906 switch (idxToken) {
907 case 1: {
908 momentum = std::atof(token->GetString().Data());
909 } break;
910 case 2: // height
911 {
912 epsilon = std::atof(token->GetString().Data());
913 } break;
914 }
915 ++idxToken;
916 }
917
918 // Add the batch norm layer
919 //
920 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
921 layer->Initialize();
922
923 // Add the same layer to fNet
924 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
925
926}
927
928////////////////////////////////////////////////////////////////////////////////
929/// Pases the layer string and creates the appropriate rnn layer
930template <typename Architecture_t, typename Layer_t>
932 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
933 TString delim)
934{
935 // int depth = 0;
936 int stateSize = 0;
937 int inputSize = 0;
938 int timeSteps = 0;
939 bool rememberState = false;
940 bool returnSequence = false;
941 bool resetGateAfter = false;
942
943 // Split layer details
944 TObjArray *subStrings = layerString.Tokenize(delim);
945 TIter nextToken(subStrings);
946 TObjString *token = (TObjString *)nextToken();
947 int idxToken = 0;
948
949 for (; token != nullptr; token = (TObjString *)nextToken()) {
950 switch (idxToken) {
951 case 1: // state size
952 {
953 TString strstateSize(token->GetString());
954 stateSize = strstateSize.Atoi();
955 break;
956 }
957 case 2: // input size
958 {
959 TString strinputSize(token->GetString());
960 inputSize = strinputSize.Atoi();
961 break;
962 }
963 case 3: // time steps
964 {
965 TString strtimeSteps(token->GetString());
966 timeSteps = strtimeSteps.Atoi();
967 break;
968 }
969 case 4: // returnSequence (option stateful in Keras)
970 {
971 TString strrememberState(token->GetString());
972 rememberState = (bool) strrememberState.Atoi();
973 break;
974 }
975 case 5: // return full output sequence (1 or 0)
976 {
977 TString str(token->GetString());
978 returnSequence = (bool)str.Atoi();
979 break;
980 }
981 case 6: // resetGate after option (only for GRU)
982 {
983 TString str(token->GetString());
984 resetGateAfter = (bool)str.Atoi();
985 }
986 }
987 ++idxToken;
988 }
989
990 // Add the recurrent layer, initialize the weights and biases and copy
991 if (rnnType == kLayerRNN) {
992 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
993 recurrentLayer->Initialize();
994 // Add same layer to fNet
995 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
996 }
997 else if (rnnType == kLayerLSTM ) {
998 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
999 recurrentLayer->Initialize();
1000 // Add same layer to fNet
1001 if (fBuildNet)
1002 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1003 }
1004 else if (rnnType == kLayerGRU) {
1005 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1006 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1007 recurrentLayer->Initialize();
1008 // Add same layer to fNet
1009 if (fBuildNet)
1010 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1011 }
1012 else {
1013 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1014 }
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018/// Standard constructor.
1019MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1020 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1021 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1022 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1023 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1024 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1025 fXInput()
1026{
1027 // Nothing to do here
1028}
1029
1030////////////////////////////////////////////////////////////////////////////////
1031/// Constructor from a weight file.
1032MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1033 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1034 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1035 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1036 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1037 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1038 fXInput()
1039{
1040 // Nothing to do here
1041}
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// Destructor.
1046{
1047 // Nothing to do here
1048}
1049
1050////////////////////////////////////////////////////////////////////////////////
1051/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1052auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1053{
1054 // remove empty spaces
1055 parseString.ReplaceAll(" ","");
1056 KeyValueVector_t blockKeyValues;
1057 const TString keyValueDelim("=");
1058
1059 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1060 TIter nextBlock(blockStrings);
1061 TObjString *blockString = (TObjString *)nextBlock();
1062
1063 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1064 blockKeyValues.push_back(std::map<TString, TString>());
1065 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1066
1067 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1068 TIter nextToken(subStrings);
1069 TObjString *token = (TObjString *)nextToken();
1070
1071 for (; token != nullptr; token = (TObjString *)nextToken()) {
1072 TString strKeyValue(token->GetString());
1073 int delimPos = strKeyValue.First(keyValueDelim.Data());
1074 if (delimPos <= 0) continue;
1075
1076 TString strKey = TString(strKeyValue(0, delimPos));
1077 strKey.ToUpper();
1078 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1079
1080 strKey.Strip(TString::kBoth, ' ');
1081 strValue.Strip(TString::kBoth, ' ');
1082
1083 currentBlock.insert(std::make_pair(strKey, strValue));
1084 }
1085 }
1086 return blockKeyValues;
1087}
1088
1089////////////////////////////////////////////////////////////////////////////////
1090/// What kind of analysis type can handle the CNN
1092{
1093 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1094 if (type == Types::kMulticlass) return kTRUE;
1095 if (type == Types::kRegression) return kTRUE;
1096
1097 return kFALSE;
1098}
1099
1100////////////////////////////////////////////////////////////////////////////////
1101/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1102/// 100 etc.
1103/// - 20% and 0.2 selects 20% of the training set as validation data.
1104/// - 100 selects 100 events as the validation data.
1105///
1106/// @return number of samples in validation set
1107///
1109{
1110 Int_t nValidationSamples = 0;
1111 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1112
1113 // Parsing + Validation
1114 // --------------------
1115 if (fNumValidationString.EndsWith("%")) {
1116 // Relative spec. format 20%
1117 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1118
1119 if (intValStr.IsFloat()) {
1120 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1121 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1122 } else {
1123 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1124 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1125 }
1126 } else if (fNumValidationString.IsFloat()) {
1127 Double_t valSizeAsDouble = fNumValidationString.Atof();
1128
1129 if (valSizeAsDouble < 1.0) {
1130 // Relative spec. format 0.2
1131 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1132 } else {
1133 // Absolute spec format 100 or 100.0
1134 nValidationSamples = valSizeAsDouble;
1135 }
1136 } else {
1137 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1138 << Endl;
1139 }
1140
1141 // Value validation
1142 // ----------------
1143 if (nValidationSamples < 0) {
1144 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1145 }
1146
1147 if (nValidationSamples == 0) {
1148 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1149 }
1150
1151 if (nValidationSamples >= (Int_t)trainingSetSize) {
1152 Log() << kFATAL << "Validation size \"" << fNumValidationString
1153 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1154 }
1155
1156 return nValidationSamples;
1157}
1158
1159
1160////////////////////////////////////////////////////////////////////////////////
1161/// Implementation of architecture specific train method
1162///
1163template <typename Architecture_t>
1165{
1166
1167 using Scalar_t = typename Architecture_t::Scalar_t;
1170 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1171
1172 bool debug = Log().GetMinType() == kDEBUG;
1173
1174
1175 // set the random seed for weight initialization
1176 Architecture_t::SetRandomSeed(fRandomSeed);
1177
1178 ///split training data in training and validation data
1179 // and determine the number of training and testing examples
1180
1181 size_t nValidationSamples = GetNumValidationSamples();
1182 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1183
1184 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1185 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1186 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1187
1188 size_t trainingPhase = 1;
1189
1190 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1191
1192 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1193
1194
1195 // After the processing of the options, initialize the master deep net
1196 size_t batchSize = settings.batchSize;
1197 this->SetBatchSize(batchSize);
1198 // Should be replaced by actual implementation. No support for this now.
1199 size_t inputDepth = this->GetInputDepth();
1200 size_t inputHeight = this->GetInputHeight();
1201 size_t inputWidth = this->GetInputWidth();
1202 size_t batchDepth = this->GetBatchDepth();
1203 size_t batchHeight = this->GetBatchHeight();
1204 size_t batchWidth = this->GetBatchWidth();
1205 ELossFunction J = this->GetLossFunction();
1207 ERegularization R = settings.regularization;
1208 EOptimizer O = settings.optimizer;
1209 Scalar_t weightDecay = settings.weightDecay;
1210
1211 //Batch size should be included in batch layout as well. There are two possibilities:
1212 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1213 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1214 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1215 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1216
1217 if (batchDepth != batchSize && batchDepth > 1) {
1218 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1219 return;
1220 }
1221 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1222 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1223 return;
1224 }
1225
1226
1227 //check also that input layout compatible with batch layout
1228 bool badLayout = false;
1229 // case batch depth == batch size
1230 if (batchDepth == batchSize)
1231 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1232 // case batch Height is batch size
1233 if (batchHeight == batchSize && batchDepth == 1)
1234 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1235 if (badLayout) {
1236 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1237 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1238 return;
1239 }
1240
1241 // check batch size is compatible with number of events
1242 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1243 Log() << kFATAL << "Number of samples in the datasets are train: ("
1244 << nTrainingSamples << ") test: (" << nValidationSamples
1245 << "). One of these is smaller than the batch size of "
1246 << settings.batchSize << ". Please increase the batch"
1247 << " size to be at least the same size as the smallest"
1248 << " of them." << Endl;
1249 }
1250
1251 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1252
1253 // create a copy of DeepNet for evaluating but with batch size = 1
1254 // fNet is the saved network and will be with CPU or Referrence architecture
1255 if (trainingPhase == 1) {
1256 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1257 batchHeight, batchWidth, J, I, R, weightDecay));
1258 fBuildNet = true;
1259 }
1260 else
1261 fBuildNet = false;
1262
1263 // Initialize the vector of slave nets
1264 std::vector<DeepNet_t> nets{};
1265 nets.reserve(nThreads);
1266 for (size_t i = 0; i < nThreads; i++) {
1267 // create a copies of the master deep net
1268 nets.push_back(deepNet);
1269 }
1270
1271
1272 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1273 CreateDeepNet(deepNet, nets);
1274
1275
1276 // set droput probabilities
1277 // use convention to store in the layer 1.- dropout probabilities
1278 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1279 for (auto & p : dropoutVector) {
1280 p = 1.0 - p;
1281 }
1282 deepNet.SetDropoutProbabilities(dropoutVector);
1283
1284 if (trainingPhase > 1) {
1285 // copy initial weights from fNet to deepnet
1286 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1287 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1288 }
1289 }
1290
1291 // when fNet is built create also input matrix that will be used to evaluate it
1292 if (fBuildNet) {
1293 //int n1 = batchHeight;
1294 //int n2 = batchWidth;
1295 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1296 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1297 //fXInput = TensorImpl_t(1,n1,n2);
1299 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1300 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1301 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1302
1303
1304 // create pointer to output matrix used for the predictions
1305 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1306
1307 // print the created network
1308 Log() << "***** Deep Learning Network *****" << Endl;
1309 if (Log().GetMinType() <= kINFO)
1310 deepNet.Print();
1311 }
1312 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1313
1314 // Loading the training and validation datasets
1315 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1316 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1317 {inputDepth, inputHeight, inputWidth},
1318 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1319 deepNet.GetOutputWidth(), nThreads);
1320
1321 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1322 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1323 {inputDepth, inputHeight, inputWidth},
1324 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1325 deepNet.GetOutputWidth(), nThreads);
1326
1327
1328
1329 // do an evaluation of the network to compute initial minimum test error
1330
1331 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1332
1333 Double_t minValError = 0.0;
1334 Log() << "Compute initial loss on the validation data " << Endl;
1335 for (auto batch : validationData) {
1336 auto inputTensor = batch.GetInput();
1337 auto outputMatrix = batch.GetOutput();
1338 auto weights = batch.GetWeights();
1339
1340 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1341 // should we apply droput to the loss ??
1342 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1343 }
1344 // add Regularization term
1345 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1346 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1347 minValError += regzTerm;
1348
1349
1350 // create a pointer to base class VOptimizer
1351 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1352
1353 // initialize the base class pointer with the corresponding derived class object.
1354 switch (O) {
1355
1356 case EOptimizer::kSGD:
1357 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1358 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1359 break;
1360
1361 case EOptimizer::kAdam: {
1362 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1364 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1365 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1366 break;
1367 }
1368
1369 case EOptimizer::kAdagrad:
1370 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1371 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1372 settings.optimizerParams["ADAGRAD_eps"]));
1373 break;
1374
1375 case EOptimizer::kRMSProp:
1376 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1377 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum,
1378 settings.optimizerParams["RMSPROP_rho"],
1379 settings.optimizerParams["RMSPROP_eps"]));
1380 break;
1381
1382 case EOptimizer::kAdadelta:
1383 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1384 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate,
1385 settings.optimizerParams["ADADELTA_rho"],
1386 settings.optimizerParams["ADADELTA_eps"]));
1387 break;
1388 }
1389
1390
1391 // Initialize the vector of batches, one batch for one slave network
1392 std::vector<TTensorBatch<Architecture_t>> batches{};
1393
1394 bool converged = false;
1395 size_t convergenceCount = 0;
1396 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1397
1398 // start measuring
1399 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1400 tstart = std::chrono::system_clock::now();
1401
1402 // function building string with optimizer parameters values for logging
1403 auto optimParametersString = [&]() {
1404 TString optimParameters;
1405 for ( auto & element : settings.optimizerParams) {
1406 TString key = element.first;
1407 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1408 double value = element.second;
1409 if (!optimParameters.IsNull())
1410 optimParameters += ",";
1411 else
1412 optimParameters += " (";
1413 optimParameters += TString::Format("%s=%g", key.Data(), value);
1414 }
1415 if (!optimParameters.IsNull())
1416 optimParameters += ")";
1417 return optimParameters;
1418 };
1419
1420 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1421 << " Optimizer " << settings.optimizerName
1422 << optimParametersString()
1423 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1424 << " minimum error = " << minValError << Endl;
1425 if (!fInteractive) {
1426 std::string separator(62, '-');
1427 Log() << separator << Endl;
1428 Log() << std::setw(10) << "Epoch"
1429 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1430 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1431 << "Conv. Steps" << Endl;
1432 Log() << separator << Endl;
1433 }
1434
1435 // set up generator for shuffling the batches
1436 // if seed is zero we have always a different order in the batches
1437 size_t shuffleSeed = 0;
1438 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1439 RandomGenerator<TRandom3> rng(shuffleSeed);
1440
1441 // print weights before
1442 if (fBuildNet && debug) {
1443 Log() << "Initial Deep Net Weights " << Endl;
1444 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1445 for (size_t l = 0; l < weights_tensor.size(); ++l)
1446 weights_tensor[l].Print();
1447 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1448 bias_tensor[0].Print();
1449 }
1450
1451 Log() << " Start epoch iteration ..." << Endl;
1452 bool debugFirstEpoch = false;
1453 bool computeLossInTraining = true; // compute loss in training or at test time
1454 size_t nTrainEpochs = 0;
1455 while (!converged) {
1456 nTrainEpochs++;
1457 trainingData.Shuffle(rng);
1458
1459 // execute all epochs
1460 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1461
1462 Double_t trainingError = 0;
1463 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1464 // Clean and load new batches, one batch for one slave net
1465 //batches.clear();
1466 //batches.reserve(nThreads);
1467 //for (size_t j = 0; j < nThreads; j++) {
1468 // batches.push_back(trainingData.GetTensorBatch());
1469 //}
1470 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1471
1472 auto my_batch = trainingData.GetTensorBatch();
1473
1474 if (debugFirstEpoch)
1475 std::cout << "got batch data - doing forward \n";
1476
1477#ifdef DEBUG
1478
1479 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1480 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1481 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1482 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1483 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1484#endif
1485
1486 deepNet.Forward(my_batch.GetInput(), true);
1487 // compute also loss
1488 if (computeLossInTraining) {
1489 auto outputMatrix = my_batch.GetOutput();
1490 auto weights = my_batch.GetWeights();
1491 trainingError += deepNet.Loss(outputMatrix, weights, false);
1492 }
1493
1494 if (debugFirstEpoch)
1495 std::cout << "- doing backward \n";
1496
1497#ifdef DEBUG
1498 size_t nlayers = deepNet.GetLayers().size();
1499 for (size_t l = 0; l < nlayers; ++l) {
1500 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1501 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1502 TString::Format("initial weights layer %d", l).Data());
1503
1504 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1505 TString::Format("output tensor layer %d", l).Data());
1506 }
1507#endif
1508
1509 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1510
1511 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1512
1513 if (debugFirstEpoch)
1514 std::cout << "- doing optimizer update \n";
1515
1516 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1517 optimizer->IncrementGlobalStep();
1518 optimizer->Step();
1519
1520#ifdef DEBUG
1521 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1522 for (size_t l = 0; l < nlayers; ++l) {
1523 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1525 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1526 }
1527 }
1528#endif
1529
1530 }
1531
1532 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1533 //}
1534 debugFirstEpoch = false;
1535 if ((nTrainEpochs % settings.testInterval) == 0) {
1536
1537 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1538
1539 t1 = std::chrono::system_clock::now();
1540
1541 // Compute validation error.
1542
1543
1544 Double_t valError = 0.0;
1545 bool inTraining = false;
1546 for (auto batch : validationData) {
1547 auto inputTensor = batch.GetInput();
1548 auto outputMatrix = batch.GetOutput();
1549 auto weights = batch.GetWeights();
1550 // should we apply droput to the loss ??
1551 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1552 }
1553 // normalize loss to number of batches and add regularization term
1554 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1555 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1556 valError += regTerm;
1557
1558 //Log the loss value
1559 fTrainHistory.AddValue("valError",nTrainEpochs,valError);
1560
1561 t2 = std::chrono::system_clock::now();
1562
1563 // checking for convergence
1564 if (valError < minValError) {
1565 convergenceCount = 0;
1566 } else {
1567 convergenceCount += settings.testInterval;
1568 }
1569
1570 // copy configuration when reached a minimum error
1571 if (valError < minValError ) {
1572 // Copy weights from deepNet to fNet
1573 Log() << std::setw(10) << nTrainEpochs
1574 << " Minimum Test error found - save the configuration " << Endl;
1575 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1576 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1577 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1578 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1579 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1580 // }
1581 }
1582 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1583 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1584
1585 minValError = valError;
1586 }
1587 else if ( minValError <= 0. )
1588 minValError = valError;
1589
1590 if (!computeLossInTraining) {
1591 trainingError = 0.0;
1592 // Compute training error.
1593 for (auto batch : trainingData) {
1594 auto inputTensor = batch.GetInput();
1595 auto outputMatrix = batch.GetOutput();
1596 auto weights = batch.GetWeights();
1597 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1598 }
1599 }
1600 // normalize loss to number of batches and add regularization term
1601 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1602 trainingError += regTerm;
1603
1604 //Log the loss value
1605 fTrainHistory.AddValue("trainingError",nTrainEpochs,trainingError);
1606
1607 // stop measuring
1608 tend = std::chrono::system_clock::now();
1609
1610 // Compute numerical throughput.
1611 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1612 std::chrono::duration<double> elapsed1 = t1-tstart;
1613 // std::chrono::duration<double> elapsed2 = t2-tstart;
1614 // time to compute training and test errors
1615 std::chrono::duration<double> elapsed_testing = tend-t1;
1616
1617 double seconds = elapsed_seconds.count();
1618 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1619 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1620 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1621
1622 converged =
1623 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1624
1625
1626 Log() << std::setw(10) << nTrainEpochs << " | "
1627 << std::setw(12) << trainingError
1628 << std::setw(12) << valError
1629 << std::setw(12) << seconds / settings.testInterval
1630 << std::setw(12) << elapsed_testing.count()
1631 << std::setw(12) << 1. / eventTime
1632 << std::setw(12) << convergenceCount
1633 << Endl;
1634
1635 if (converged) {
1636 Log() << Endl;
1637 }
1638 tstart = std::chrono::system_clock::now();
1639 }
1640
1641 // if (stepCount % 10 == 0 || converged) {
1642 if (converged && debug) {
1643 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1644 << Endl;
1645 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1646 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1647 for (size_t l = 0; l < weights_tensor.size(); ++l)
1648 weights_tensor[l].Print();
1649 bias_tensor[0].Print();
1650 }
1651
1652 }
1653
1654 trainingPhase++;
1655 } // end loop on training Phase
1656}
1657
1658////////////////////////////////////////////////////////////////////////////////
1660{
1661 if (fInteractive) {
1662 Log() << kFATAL << "Not implemented yet" << Endl;
1663 return;
1664 }
1665
1666 // using for training same scalar type defined for the prediction
1667 if (this->GetArchitectureString() == "GPU") {
1668#ifdef R__HAS_TMVAGPU
1669 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1670#ifdef R__HAS_CUDNN
1671 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1672#else
1673 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1674#endif
1675#else
1676 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1677 "you have CUDA installed and it was successfully "
1678 "detected by CMAKE."
1679 << Endl;
1680 return;
1681#endif
1682 } else if (this->GetArchitectureString() == "CPU") {
1683#ifdef R__HAS_TMVACPU
1684 // note that number of threads used for BLAS might be different
1685 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1686 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1687 << gConfig().GetNCpu() << Endl << Endl;
1688#else
1689 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1690 << Endl;
1691#endif
1692 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1693 return;
1694 }
1695 else {
1696 Log() << kFATAL << this->GetArchitectureString() <<
1697 " is not a supported architecture for TMVA::MethodDL"
1698 << Endl;
1699 }
1700
1701}
1702
1703////////////////////////////////////////////////////////////////////////////////
1705{
1706 // fill the input tensor fXInput from the current Event data
1707 // with the correct shape depending on the model used
1708 // The input tensor is used for network prediction after training
1709 // using a single event. The network batch size must be equal to 1.
1710 // The architecture specified at compile time in ArchitectureImpl_t
1711 // is used. This should be the CPU architecture
1712
1713 if (!fNet || fNet->GetDepth() == 0) {
1714 Log() << kFATAL << "The network has not been trained and fNet is not built" << Endl;
1715 }
1716 if (fNet->GetBatchSize() != 1) {
1717 Log() << kFATAL << "FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" << Endl;
1718 }
1719
1720 // get current event
1721 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1722 size_t nVariables = GetEvent()->GetNVariables();
1723
1724 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1725 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1726 R__ASSERT(fXInput.GetShape().size() < 4);
1727 size_t nc, nhw = 0;
1728 if (fXInput.GetShape().size() == 2) {
1729 nc = fXInput.GetShape()[0];
1730 if (nc != 1) {
1731 ArchitectureImpl_t::PrintTensor(fXInput);
1732 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1" << Endl;
1733 }
1734 nhw = fXInput.GetShape()[1];
1735 } else {
1736 nc = fXInput.GetCSize();
1737 nhw = fXInput.GetWSize();
1738 }
1739 if (nVariables != nc * nhw) {
1740 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1741 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw << Endl;
1742 }
1743 for (size_t j = 0; j < nc; j++) {
1744 for (size_t k = 0; k < nhw; k++) {
1745 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1746 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k]; // for column layout !!!
1747 }
1748 }
1749 } else {
1750 // row-wise layout
1751 assert(fXInput.GetShape().size() >= 4);
1752 size_t nc = fXInput.GetCSize();
1753 size_t nh = fXInput.GetHSize();
1754 size_t nw = fXInput.GetWSize();
1755 size_t n = nc * nh * nw;
1756 if (nVariables != n) {
1757 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1758 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1759 << Endl;
1760 }
1761 for (size_t j = 0; j < n; j++) {
1762 // in this case TMVA event has same order as input tensor
1763 fXInputBuffer[j] = inputValues[j]; // for column layout !!!
1764 }
1765 }
1766 // copy buffer in input
1767 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1768 return;
1769}
1770
1771////////////////////////////////////////////////////////////////////////////////
1772Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1773{
1774
1776
1777 // perform the prediction
1778 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1779
1780 // return value
1781 double mvaValue = (*fYHat)(0, 0);
1782
1783 // for debugging
1784#ifdef DEBUG_MVAVALUE
1785 using Tensor_t = std::vector<MatrixImpl_t>;
1786 TMatrixF xInput(n1,n2, inputValues.data() );
1787 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1788 xInput.Print();
1789 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1790 auto & deepnet = *fNet;
1791 std::cout << "Loop on layers " << std::endl;
1792 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1793 std::cout << "Layer " << l;
1794 const auto * layer = deepnet.GetLayerAt(l);
1795 const Tensor_t & layer_output = layer->GetOutput();
1796 layer->Print();
1797 std::cout << "DNN output " << layer_output.size() << std::endl;
1798 for (size_t i = 0; i < layer_output.size(); ++i) {
1799#ifdef R__HAS_TMVAGPU
1800 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1801 TMatrixD m = layer_output[i];
1802#else
1803 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1804#endif
1805 m.Print();
1806 }
1807 const Tensor_t & layer_weights = layer->GetWeights();
1808 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1809 if (layer_weights.size() > 0) {
1810 int i = 0;
1811#ifdef R__HAS_TMVAGPU
1812 TMatrixD m = layer_weights[i];
1813// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1814#else
1815 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1816#endif
1817 m.Print();
1818 }
1819 }
1820#endif
1821
1822 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1823}
1824////////////////////////////////////////////////////////////////////////////////
1825/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1826////////////////////////////////////////////////////////////////////////////////
1827template <typename Architecture_t>
1828std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1829{
1830
1831 // Check whether the model is setup
1832 if (!fNet || fNet->GetDepth() == 0) {
1833 Log() << kFATAL << "The network has not been trained and fNet is not built"
1834 << Endl;
1835 }
1836
1837 // rebuild the networks
1838 this->SetBatchSize(batchSize);
1839 size_t inputDepth = this->GetInputDepth();
1840 size_t inputHeight = this->GetInputHeight();
1841 size_t inputWidth = this->GetInputWidth();
1842 size_t batchDepth = this->GetBatchDepth();
1843 size_t batchHeight = this->GetBatchHeight();
1844 size_t batchWidth = this->GetBatchWidth();
1845 ELossFunction J = fNet->GetLossFunction();
1846 EInitialization I = fNet->GetInitialization();
1847 ERegularization R = fNet->GetRegularization();
1848 Double_t weightDecay = fNet->GetWeightDecay();
1849
1850 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1851 using Matrix_t = typename Architecture_t::Matrix_t;
1852 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1853
1854 // create the deep neural network
1855 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1856 std::vector<DeepNet_t> nets{};
1857 fBuildNet = false;
1858 CreateDeepNet(deepNet,nets);
1859
1860 // copy weights from the saved fNet to the built DeepNet
1861 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1862 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1863 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1864 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1865 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1866 // }
1867 }
1868
1869 size_t n1 = deepNet.GetBatchHeight();
1870 size_t n2 = deepNet.GetBatchWidth();
1871 size_t n0 = deepNet.GetBatchSize();
1872 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1873 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1874 n1 = deepNet.GetBatchSize();
1875 n0 = 1;
1876 }
1877 //this->SetBatchDepth(n0);
1878 Long64_t nEvents = lastEvt - firstEvt;
1879 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1880 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1881
1882
1883 // Tensor_t xInput;
1884 // for (size_t i = 0; i < n0; ++i)
1885 // xInput.emplace_back(Matrix_t(n1,n2));
1886
1887 // create pointer to output matrix used for the predictions
1888 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1889
1890 // use timer
1891 Timer timer( nEvents, GetName(), kTRUE );
1892
1893 if (logProgress)
1894 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1895 << "Evaluation of " << GetMethodName() << " on "
1896 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1897 << " sample (" << nEvents << " events)" << Endl;
1898
1899
1900 // eventg loop
1901 std::vector<double> mvaValues(nEvents);
1902
1903
1904 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1905
1906 Long64_t ievt_end = ievt + batchSize;
1907 // case of batch prediction for
1908 if (ievt_end <= lastEvt) {
1909
1910 if (ievt == firstEvt) {
1911 Data()->SetCurrentEvent(ievt);
1912 size_t nVariables = GetEvent()->GetNVariables();
1913
1914 if (n1 == batchSize && n0 == 1) {
1915 if (n2 != nVariables) {
1916 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1917 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1918 << Endl;
1919 }
1920 } else {
1921 if (n1*n2 != nVariables || n0 != batchSize) {
1922 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1923 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1924 << Endl;
1925 }
1926 }
1927 }
1928
1929 auto batch = testData.GetTensorBatch();
1930 auto inputTensor = batch.GetInput();
1931
1932 auto xInput = batch.GetInput();
1933 // make the prediction
1934 deepNet.Prediction(yHat, xInput, fOutputFunction);
1935 for (size_t i = 0; i < batchSize; ++i) {
1936 double value = yHat(i,0);
1937 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1938 }
1939 }
1940 else {
1941 // case of remaining events: compute prediction by single event !
1942 for (Long64_t i = ievt; i < lastEvt; ++i) {
1943 Data()->SetCurrentEvent(i);
1944 mvaValues[i] = GetMvaValue();
1945 }
1946 }
1947 }
1948
1949 if (logProgress) {
1950 Log() << kINFO
1951 << "Elapsed time for evaluation of " << nEvents << " events: "
1952 << timer.GetElapsedTime() << " " << Endl;
1953 }
1954
1955 return mvaValues;
1956}
1957
1958//////////////////////////////////////////////////////////////////////////
1959/// Get the regression output values for a single event
1960//////////////////////////////////////////////////////////////////////////
1961const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1962{
1963
1964 FillInputTensor ();
1965
1966 // perform the network prediction
1967 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1968
1969 size_t nTargets = DataInfo().GetNTargets();
1970 R__ASSERT(nTargets == fYHat->GetNcols());
1971
1972 std::vector<Float_t> output(nTargets);
1973 for (size_t i = 0; i < nTargets; i++)
1974 output[i] = (*fYHat)(0, i);
1975
1976 // ned to transform back output values
1977 if (fRegressionReturnVal == NULL)
1978 fRegressionReturnVal = new std::vector<Float_t>(nTargets);
1979 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1980
1981 // N.B. one should cache here temporary event class
1982 Event *evT = new Event(*GetEvent());
1983 for (size_t i = 0; i < nTargets; ++i) {
1984 evT->SetTarget(i, output[i]);
1985 }
1986 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1987 for (size_t i = 0; i < nTargets; ++i) {
1988 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1989 }
1990 delete evT;
1991 return *fRegressionReturnVal;
1992}
1993//////////////////////////////////////////////////////////////////////////
1994/// Get the multi-class output values for a single event
1995//////////////////////////////////////////////////////////////////////////
1996const std::vector<Float_t> &TMVA::MethodDL::GetMulticlassValues()
1997{
1998
1999 FillInputTensor();
2000
2001 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2002
2003 size_t nClasses = DataInfo().GetNClasses();
2004 R__ASSERT(nClasses == fYHat->GetNcols());
2005
2006 if (fMulticlassReturnVal == NULL) {
2007 fMulticlassReturnVal = new std::vector<Float_t>(nClasses);
2008 }
2009 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2010
2011 for (size_t i = 0; i < nClasses; i++) {
2012 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2013 }
2014 return *fMulticlassReturnVal;
2015}
2016
2017////////////////////////////////////////////////////////////////////////////////
2018/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2019/// Here we will evaluate using a default batch size and the same architecture used for
2020/// Training
2021////////////////////////////////////////////////////////////////////////////////
2022std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2023{
2024
2025 Long64_t nEvents = Data()->GetNEvents();
2026 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2027 if (firstEvt < 0) firstEvt = 0;
2028 nEvents = lastEvt-firstEvt;
2029
2030 // use same batch size as for training (from first strategy)
2031 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2032 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2033 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2034
2035 // using for training same scalar type defined for the prediction
2036 if (this->GetArchitectureString() == "GPU") {
2037#ifdef R__HAS_TMVAGPU
2038 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2039#ifdef R__HAS_CUDNN
2040 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2041#else
2042 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2043#endif
2044
2045#endif
2046 }
2047 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2049}
2050////////////////////////////////////////////////////////////////////////////////
2051void MethodDL::AddWeightsXMLTo(void * parent) const
2052{
2053 // Create the parent XML node with name "Weights"
2054 auto & xmlEngine = gTools().xmlengine();
2055 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2056
2057 /*! Get all necessary information, in order to be able to reconstruct the net
2058 * if we read the same XML file. */
2059
2060 // Deep Net specific info
2061 Int_t depth = fNet->GetDepth();
2062
2063 Int_t inputDepth = fNet->GetInputDepth();
2064 Int_t inputHeight = fNet->GetInputHeight();
2065 Int_t inputWidth = fNet->GetInputWidth();
2066
2067 Int_t batchSize = fNet->GetBatchSize();
2068
2069 Int_t batchDepth = fNet->GetBatchDepth();
2070 Int_t batchHeight = fNet->GetBatchHeight();
2071 Int_t batchWidth = fNet->GetBatchWidth();
2072
2073 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2074 char initialization = static_cast<char>(fNet->GetInitialization());
2075 char regularization = static_cast<char>(fNet->GetRegularization());
2076
2077 Double_t weightDecay = fNet->GetWeightDecay();
2078
2079 // Method specific info (not sure these are needed)
2080 char outputFunction = static_cast<char>(this->GetOutputFunction());
2081 //char lossFunction = static_cast<char>(this->GetLossFunction());
2082
2083 // Add attributes to the parent node
2084 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2085
2086 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2087 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2088 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2089
2090 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2091 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2092 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2093 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2094
2095 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2096 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2097 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2098 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2099
2100 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2101
2102
2103 for (Int_t i = 0; i < depth; i++)
2104 {
2105 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2106 }
2107
2108
2109}
2110
2111////////////////////////////////////////////////////////////////////////////////
2113{
2114
2115 auto netXML = gTools().GetChild(rootXML, "Weights");
2116 if (!netXML){
2117 netXML = rootXML;
2118 }
2119
2120 size_t netDepth;
2121 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2122
2123 size_t inputDepth, inputHeight, inputWidth;
2124 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2125 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2126 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2127
2128 size_t batchSize, batchDepth, batchHeight, batchWidth;
2129 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2130 // use always batchsize = 1
2131 //batchSize = 1;
2132 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2133 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2134 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2135
2136 char lossFunctionChar;
2137 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2138 char initializationChar;
2139 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2140 char regularizationChar;
2141 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2142 char outputFunctionChar;
2143 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2144 double weightDecay;
2145 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2146
2147 // create the net
2148
2149 // DeepNetCpu_t is defined in MethodDL.h
2150 this->SetInputDepth(inputDepth);
2151 this->SetInputHeight(inputHeight);
2152 this->SetInputWidth(inputWidth);
2153 this->SetBatchDepth(batchDepth);
2154 this->SetBatchHeight(batchHeight);
2155 this->SetBatchWidth(batchWidth);
2156
2157
2158
2159 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2160 batchHeight, batchWidth,
2161 static_cast<ELossFunction>(lossFunctionChar),
2162 static_cast<EInitialization>(initializationChar),
2163 static_cast<ERegularization>(regularizationChar),
2164 weightDecay));
2165
2166 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2167
2168
2169 //size_t previousWidth = inputWidth;
2170 auto layerXML = gTools().xmlengine().GetChild(netXML);
2171
2172 // loop on the layer and add them to the network
2173 for (size_t i = 0; i < netDepth; i++) {
2174
2175 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2176
2177 // case of dense layer
2178 if (layerName == "DenseLayer") {
2179
2180 // read width and activation function and then we can create the layer
2181 size_t width = 0;
2182 gTools().ReadAttr(layerXML, "Width", width);
2183
2184 // Read activation function.
2185 TString funcString;
2186 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2187 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2188
2189
2190 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2191
2192 }
2193 // Convolutional Layer
2194 else if (layerName == "ConvLayer") {
2195
2196 // read width and activation function and then we can create the layer
2197 size_t depth = 0;
2198 gTools().ReadAttr(layerXML, "Depth", depth);
2199 size_t fltHeight, fltWidth = 0;
2200 size_t strideRows, strideCols = 0;
2201 size_t padHeight, padWidth = 0;
2202 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2203 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2204 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2205 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2206 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2207 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2208
2209 // Read activation function.
2210 TString funcString;
2211 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2212 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2213
2214
2215 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2216 padHeight, padWidth, actFunction);
2217
2218 }
2219
2220 // MaxPool Layer
2221 else if (layerName == "MaxPoolLayer") {
2222
2223 // read maxpool layer info
2224 size_t filterHeight, filterWidth = 0;
2225 size_t strideRows, strideCols = 0;
2226 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2227 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2228 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2229 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2230
2231 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2232 }
2233 // Reshape Layer
2234 else if (layerName == "ReshapeLayer") {
2235
2236 // read reshape layer info
2237 size_t depth, height, width = 0;
2238 gTools().ReadAttr(layerXML, "Depth", depth);
2239 gTools().ReadAttr(layerXML, "Height", height);
2240 gTools().ReadAttr(layerXML, "Width", width);
2241 int flattening = 0;
2242 gTools().ReadAttr(layerXML, "Flattening",flattening );
2243
2244 fNet->AddReshapeLayer(depth, height, width, flattening);
2245
2246 }
2247 // RNN Layer
2248 else if (layerName == "RNNLayer") {
2249
2250 // read RNN layer info
2251 size_t stateSize,inputSize, timeSteps = 0;
2252 int rememberState= 0;
2253 int returnSequence = 0;
2254 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2255 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2256 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2257 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2258 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2259
2260 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2261
2262 }
2263 // LSTM Layer
2264 else if (layerName == "LSTMLayer") {
2265
2266 // read RNN layer info
2267 size_t stateSize,inputSize, timeSteps = 0;
2268 int rememberState, returnSequence = 0;
2269 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2270 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2271 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2272 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2273 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2274
2275 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2276
2277 }
2278 // GRU Layer
2279 else if (layerName == "GRULayer") {
2280
2281 // read RNN layer info
2282 size_t stateSize,inputSize, timeSteps = 0;
2283 int rememberState, returnSequence, resetGateAfter = 0;
2284 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2285 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2286 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2287 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2288 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2289 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2290
2291 if (!resetGateAfter && ArchitectureImpl_t::IsCudnn())
2292 Warning("ReadWeightsFromXML",
2293 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2294
2295 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2296 }
2297 // BatchNorm Layer
2298 else if (layerName == "BatchNormLayer") {
2299 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2300 fNet->AddBatchNormLayer(0., 0.0);
2301 }
2302 // read weights and biases
2303 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2304
2305 // read next layer
2306 layerXML = gTools().GetNextChild(layerXML);
2307 }
2308
2309 fBuildNet = false;
2310 // create now the input and output matrices
2311 //int n1 = batchHeight;
2312 //int n2 = batchWidth;
2313 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2314 //if (fXInput.size() > 0) fXInput.clear();
2315 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2317 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2318 // make here a ColumnMajor tensor
2321
2322 // create pointer to output matrix used for the predictions
2323 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2324
2325
2326}
2327
2328
2329////////////////////////////////////////////////////////////////////////////////
2330void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2331{
2332}
2333
2334////////////////////////////////////////////////////////////////////////////////
2336{
2337 // TODO
2338 return NULL;
2339}
2340
2341////////////////////////////////////////////////////////////////////////////////
2343{
2344 // TODO
2345}
2346
2347} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
const Bool_t kFALSE
Definition: RtypesCore.h:101
unsigned int UInt_t
Definition: RtypesCore.h:46
double Double_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:80
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:375
#define R__ASSERT(e)
Definition: TError.h:118
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition: TString.cxx:2468
The Formula class.
Definition: TFormula.h:87
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3445
void Print(Option_t *option="") const override
Dump this line with its attributes.
Definition: TLine.cxx:415
UInt_t GetNCpu()
Definition: Config.h:70
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:45
Adagrad Optimizer class.
Definition: Adagrad.h:45
Adam Optimizer class.
Definition: Adam.h:45
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
static bool IsCudnn()
Definition: Cpu.h:131
Generic Deep Neural Network class.
Definition: DeepNet.h:73
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:825
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:608
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:740
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:567
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:485
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:439
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:773
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:524
Generic layer class.
Definition: DenseLayer.h:59
RMSProp Optimizer class.
Definition: RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:46
Generic General Layer class.
Definition: GeneralLayer.h:51
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
Class that contains all the data information.
Definition: DataSetInfo.h:62
UInt_t GetNClasses() const
Definition: DataSetInfo.h:155
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:88
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:367
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:316
UInt_t GetClass() const
Definition: Event.h:86
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
const char * GetName() const
Definition: MethodBase.h:334
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:686
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition: MethodBase.h:346
const TString & GetMethodName() const
Definition: MethodBase.h:331
const Event * GetEvent() const
Definition: MethodBase.h:751
DataSetInfo & DataInfo() const
Definition: MethodBase.h:410
UInt_t GetNVariables() const
Definition: MethodBase.h:345
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:595
UInt_t GetNvar() const
Definition: MethodBase.h:344
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
DataSet * Data() const
Definition: MethodBase.h:409
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
Definition: MethodBase.h:448
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:183
void GetHelpMessage() const
Definition: MethodDL.cxx:2342
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:190
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:178
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:194
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:286
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:208
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1659
size_t GetBatchHeight() const
Definition: MethodDL.h:263
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
Definition: MethodDL.cxx:2022
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:768
TensorImpl_t fXInput
Definition: MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:186
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecture: CPU or GPU.
Definition: MethodDL.h:198
void Init()
default initializations
Definition: MethodDL.cxx:432
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1019
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1164
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:583
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:288
void ProcessOptions()
Definition: MethodDL.cxx:219
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:184
size_t GetInputDepth() const
Definition: MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:209
TString GetInputLayoutString() const
Definition: MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:293
size_t GetInputHeight() const
Definition: MethodDL.h:256
TString GetArchitectureString() const
Definition: MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:482
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:890
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2330
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2112
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:199
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:93
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:188
size_t GetBatchDepth() const
Definition: MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:931
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:204
size_t GetInputWidth() const
Definition: MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:193
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1091
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:669
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:829
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:196
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2335
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:292
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1052
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1828
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:291
TString GetLayoutString() const
Definition: MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:182
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:106
size_t GetBatchWidth() const
Definition: MethodDL.h:264
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2051
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:107
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1045
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
Definition: MethodDL.cxx:1772
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:439
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
Definition: MethodDL.cxx:1704
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:201
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:529
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:195
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:167
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:192
EMsgType GetMinType() const
Definition: MsgLogger.h:69
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
TXMLEngine & xmlengine()
Definition: Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition: Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1223
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition: Tools.cxx:1162
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition: Types.h:71
EAnalysisType
Definition: Types.h:126
@ kMulticlass
Definition: Types.h:129
@ kClassification
Definition: Types.h:127
@ kRegression
Definition: Types.h:128
@ kTraining
Definition: Types.h:143
@ kDEBUG
Definition: Types.h:56
@ kHEADER
Definition: Types.h:63
@ kERROR
Definition: Types.h:60
@ kINFO
Definition: Types.h:58
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
void Print(Option_t *option="") const override
Dump this marker with its attributes.
Definition: TMarker.cxx:334
void Print(Option_t *name="") const override
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
void Print(Option_t *option="") const override
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:31
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:955
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:969
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:630
Basic string class.
Definition: TString.h:136
Ssiz_t Length() const
Definition: TString.h:410
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1967
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1152
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1837
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:532
const char * Data() const
Definition: TString.h:369
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:693
@ kTrailing
Definition: TString.h:267
@ kBoth
Definition: TString.h:267
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1184
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2243
Bool_t IsNull() const
Definition: TString.h:407
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2357
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
RVec< PromoteType< T > > abs(const RVec< T > &v)
Definition: RVec.hxx:1778
const Int_t n
Definition: legend1.C:16
#define I(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
static const std::string separator("@@@")
EInitialization
Definition: Functions.h:72
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:75
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition: TMath.h:890
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition: TMath.h:754
All of the options that can be specified in the training string.
Definition: MethodDL.h:72
std::map< TString, double > optimizerParams
Definition: MethodDL.h:84
DNN::EOptimizer optimizer
Definition: MethodDL.h:78
DNN::ERegularization regularization
Definition: MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:83
TMarker m
Definition: textangle.C:8
TLine l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20
double epsilon
Definition: triangle.c:618
static void output()