Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (see tmva/doc/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31#include "TObjString.h"
32
33#include "TMVA/Tools.h"
34#include "TMVA/Configurable.h"
35#include "TMVA/IMethod.h"
37#include "TMVA/MethodDL.h"
38#include "TMVA/Types.h"
40#include "TMVA/DNN/Functions.h"
42#include "TMVA/DNN/SGD.h"
43#include "TMVA/DNN/Adam.h"
44#include "TMVA/DNN/Adagrad.h"
45#include "TMVA/DNN/RMSProp.h"
46#include "TMVA/DNN/Adadelta.h"
47#include "TMVA/Timer.h"
48
49#ifdef R__HAS_TMVAGPU
51#ifdef R__HAS_CUDNN
53#endif
54#endif
55
56#include <chrono>
57
59
60using namespace TMVA::DNN::CNN;
61using namespace TMVA::DNN;
62
68
69
70namespace TMVA {
71
72
73////////////////////////////////////////////////////////////////////////////////
74TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
75{
76 key.ToUpper();
77 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
78 if (it == keyValueMap.end()) {
79 return TString("");
80 }
81 return it->second;
82}
83
84////////////////////////////////////////////////////////////////////////////////
85template <typename T>
86T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
87
88////////////////////////////////////////////////////////////////////////////////
89template <>
90int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
91{
93 if (value == "") {
94 return defaultValue;
95 }
96 return value.Atoi();
97}
98
99////////////////////////////////////////////////////////////////////////////////
100template <>
101double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
102{
104 if (value == "") {
105 return defaultValue;
106 }
107 return value.Atof();
108}
109
110////////////////////////////////////////////////////////////////////////////////
111template <>
112TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
113{
115 if (value == "") {
116 return defaultValue;
117 }
118 return value;
119}
120
121////////////////////////////////////////////////////////////////////////////////
122template <>
123bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
124{
126 if (value == "") {
127 return defaultValue;
128 }
129
130 value.ToUpper();
131 if (value == "TRUE" || value == "T" || value == "1") {
132 return true;
133 }
134
135 return false;
136}
137
138////////////////////////////////////////////////////////////////////////////////
139template <>
140std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
141 std::vector<double> defaultValue)
142{
144 if (parseString == "") {
145 return defaultValue;
146 }
147
148 parseString.ToUpper();
149 std::vector<double> values;
150
151 const TString tokenDelim("+");
155 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
156 std::stringstream sstr;
157 double currentValue;
158 sstr << tokenString->GetString().Data();
160 values.push_back(currentValue);
161 }
162 return values;
163}
164
165////////////////////////////////////////////////////////////////////////////////
167{
168 // Set default values for all option strings
169
170 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
171
172 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
173
174 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
175
176 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
177 " or cross entropy (binary classification).");
178 AddPreDefVal(TString("CROSSENTROPY"));
179 AddPreDefVal(TString("SUMOFSQUARES"));
180 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
181
182 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
183 AddPreDefVal(TString("XAVIER"));
184 AddPreDefVal(TString("XAVIERUNIFORM"));
185 AddPreDefVal(TString("GAUSS"));
186 AddPreDefVal(TString("UNIFORM"));
187 AddPreDefVal(TString("IDENTITY"));
188 AddPreDefVal(TString("ZERO"));
189
190 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
191
192 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
193 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
194 "Specify as 100 to use exactly 100 events. (Default: 20%)");
195
196 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
197 AddPreDefVal(TString("STANDARD")); // deprecated and not supported anymore
198 AddPreDefVal(TString("CPU"));
199 AddPreDefVal(TString("GPU"));
200 AddPreDefVal(TString("OPENCL")); // not yet implemented
201 AddPreDefVal(TString("CUDNN")); // not needed (by default GPU is now CUDNN if available)
202
203 // define training strategy separated by a separator "|"
204 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-3,"
205 "Momentum=0.0,"
206 "ConvergenceSteps=100,"
207 "MaxEpochs=2000,"
208 "Optimizer=ADAM,"
209 "BatchSize=30,"
210 "TestRepetitions=1,"
211 "WeightDecay=0.0,"
212 "Regularization=None,"
213 "DropConfig=0.0",
214 "TrainingStrategy", "Defines the training strategies.");
215}
216
217////////////////////////////////////////////////////////////////////////////////
219{
220
222 Log() << kINFO << "Will ignore negative events in training!" << Endl;
223 }
224
225 if (fArchitectureString == "STANDARD") {
226 Log() << kWARNING << "The STANDARD architecture is not supported anymore. "
227 "Please use Architecture=CPU or Architecture=CPU."
228 "See the TMVA Users' Guide for instructions if you "
229 "encounter problems."
230 << Endl;
231 Log() << kINFO << "We will use instead the CPU architecture" << Endl;
232 fArchitectureString = "CPU";
233 }
234 if (fArchitectureString == "OPENCL") {
235 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
236 "Please use Architecture=CPU or Architecture=CPU for the "
237 "time being. See the TMVA Users' Guide for instructions "
238 "if you encounter problems."
239 << Endl;
240 // use instead GPU
241 Log() << kINFO << "We will try using the GPU-CUDA architecture if available" << Endl;
242 fArchitectureString = "GPU";
243 }
244
245 // the architecture can now be set at runtime as an option
246
247
248 if (fArchitectureString == "GPU" || fArchitectureString == "CUDNN") {
249#ifdef R__HAS_TMVAGPU
250 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
251#else // case TMVA does not support GPU
252 Log() << kERROR << "CUDA backend not enabled. Please make sure "
253 "you have CUDA installed and it was successfully "
254 "detected by CMAKE by using -Dtmva-gpu=On "
255 << Endl;
256 fArchitectureString = "CPU";
257 Log() << kINFO << "Will now use instead the CPU architecture !" << Endl;
258#endif
259 }
260
261 if (fArchitectureString == "CPU") {
262#ifdef R__HAS_TMVACPU // TMVA has CPU BLAS and IMT support
263 Log() << kINFO << "Will now use the CPU architecture with BLAS and IMT support !" << Endl;
264#else // TMVA has no CPU BLAS or IMT support
265 Log() << kINFO << "Multi-core CPU backend not enabled. For better performances, make sure "
266 "you have a BLAS implementation and it was successfully "
267 "detected by CMake as well that the imt CMake flag is set."
268 << Endl;
269 Log() << kINFO << "Will use anyway the CPU architecture but with slower performance" << Endl;
270#endif
271 }
272
273 // Input Layout
276
277 // Loss function and output.
278 fOutputFunction = EOutputFunction::kSigmoid;
280 if (fErrorStrategy == "SUMOFSQUARES") {
281 fLossFunction = ELossFunction::kMeanSquaredError;
282 }
283 if (fErrorStrategy == "CROSSENTROPY") {
284 fLossFunction = ELossFunction::kCrossEntropy;
285 }
286 fOutputFunction = EOutputFunction::kSigmoid;
287 } else if (fAnalysisType == Types::kRegression) {
288 if (fErrorStrategy != "SUMOFSQUARES") {
289 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
290 << " neural net error function. Setting error function to "
291 << " SUMOFSQUARES now." << Endl;
292 }
293
294 fLossFunction = ELossFunction::kMeanSquaredError;
295 fOutputFunction = EOutputFunction::kIdentity;
296 } else if (fAnalysisType == Types::kMulticlass) {
297 if (fErrorStrategy == "SUMOFSQUARES") {
298 fLossFunction = ELossFunction::kMeanSquaredError;
299 }
300 if (fErrorStrategy == "CROSSENTROPY") {
301 fLossFunction = ELossFunction::kCrossEntropy;
302 }
303 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
304 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
305 }
306 fOutputFunction = EOutputFunction::kSoftmax;
307 }
308
309 // Initialization
310 // the biases will be always initialized to zero
311 if (fWeightInitializationString == "XAVIER") {
313 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
315 } else if (fWeightInitializationString == "GAUSS") {
317 } else if (fWeightInitializationString == "UNIFORM") {
319 } else if (fWeightInitializationString == "ZERO") {
321 } else if (fWeightInitializationString == "IDENTITY") {
323 } else {
325 }
326
327 // Training settings.
328
330 for (auto &block : strategyKeyValues) {
332
333 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
334 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
335 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
336 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
337 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
338 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
339 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
340 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
341
342 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
343 if (regularization == "L1") {
344 settings.regularization = DNN::ERegularization::kL1;
345 } else if (regularization == "L2") {
346 settings.regularization = DNN::ERegularization::kL2;
347 } else {
348 settings.regularization = DNN::ERegularization::kNone;
349 }
350
351 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
352 settings.optimizerName = optimizer;
353 if (optimizer == "SGD") {
355 } else if (optimizer == "ADAM") {
357 } else if (optimizer == "ADAGRAD") {
359 } else if (optimizer == "RMSPROP") {
361 } else if (optimizer == "ADADELTA") {
363 } else {
364 // Make Adam as default choice if the input string is
365 // incorrect.
367 settings.optimizerName = "ADAM";
368 }
369 // check for specific optimizer parameters
370 std::vector<TString> optimParamLabels = {"_beta1", "_beta2", "_eps", "_rho"};
371 //default values
372 std::map<TString, double> defaultValues = {
373 {"ADADELTA_eps", 1.E-8}, {"ADADELTA_rho", 0.95},
374 {"ADAGRAD_eps", 1.E-8},
375 {"ADAM_beta1", 0.9}, {"ADAM_beta2", 0.999}, {"ADAM_eps", 1.E-7},
376 {"RMSPROP_eps", 1.E-7}, {"RMSPROP_rho", 0.9},
377 };
378 for (auto &pN : optimParamLabels) {
379 TString optimParamName = settings.optimizerName + pN;
380 // check if optimizer has default values for this specific parameters
381 if (defaultValues.count(optimParamName) > 0) {
384 // create entry in settings for this optimizer parameter
385 settings.optimizerParams[optimParamName] = val;
386 }
387 }
388
389 fTrainingSettings.push_back(settings);
390 }
391
392 // this set fInputShape[0] = batchSize
393 this->SetBatchSize(fTrainingSettings.front().batchSize);
394
395 // case inputlayout and batch layout was not given. Use default then
396 // (1, batchsize, nvariables)
397 // fInputShape[0] -> BatchSize
398 // fInputShape[1] -> InputDepth
399 // fInputShape[2] -> InputHeight
400 // fInputShape[3] -> InputWidth
401 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
402 fInputShape[1] = 1;
403 fInputShape[2] = 1;
405 }
406 // case when batch layout is not provided (all zero)
407 // batch layout can be determined by the input layout + batch size
408 // case DNN : { 1, B, W }
409 // case CNN : { B, C, H*W}
410 // case RNN : { B, T, H*W }
411
412 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
413 // case first layer is DENSE
414 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
415 // case of (1, batchsize, input features)
416 fBatchDepth = 1;
417 fBatchHeight = fTrainingSettings.front().batchSize;
419 }
420 else { // more general cases (e.g. for CNN)
421 // case CONV or RNN
422 fBatchDepth = fTrainingSettings.front().batchSize;
425 }
426 }
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// default initializations
432{
433 // Nothing to do here
434}
435
436////////////////////////////////////////////////////////////////////////////////
437/// Parse the input layout
439{
440 // Define the delimiter
441 const TString delim("|");
442
443 // Get the input layout string
445
446 // Split the input layout string
450
451 // Go through every token and save its absolute value in the shape array
452 // The first token is the batch size for easy compatibility with cudnn
453 int subDim = 1;
454 std::vector<size_t> inputShape;
455 inputShape.reserve(inputLayoutString.Length()/2 + 2);
456 inputShape.push_back(0); // Will be set later by Trainingsettings, use 0 value now
457 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
458 // size_t is unsigned
459 subDim = (size_t) abs(inputDimString->GetString().Atoi());
460 // Size among unused dimensions should be set to 1 for cudnn
461 //if (subDim == 0) subDim = 1;
462 inputShape.push_back(subDim);
463 }
464 // it is expected that empty Shape has at least 4 dimensions. We pad the missing one's with 1
465 // for example in case of dense layer input layouts
466 // when we will support 3D convolutions we would need to add extra 1's
467 if (inputShape.size() == 2) {
468 // case of dense layer where only width is specified
469 inputShape = {inputShape[0], 1, 1, inputShape[1]};
470 }
471 else if (inputShape.size() == 3) {
472 //e.g. case of RNN T,W -> T,1,W
473 inputShape = {inputShape[0], inputShape[1], 1, inputShape[2]};
474 }
475
476 this->SetInputShape(inputShape);
477}
478
479////////////////////////////////////////////////////////////////////////////////
480/// Parse the input layout
482{
483 // Define the delimiter
484 const TString delim("|");
485
486 // Get the input layout string
488
489 size_t batchDepth = 0;
490 size_t batchHeight = 0;
491 size_t batchWidth = 0;
492
493 // Split the input layout string
497 int idxToken = 0;
498
499 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
500 switch (idxToken) {
501 case 0: // input depth
502 {
503 TString strDepth(batchDimString->GetString());
504 batchDepth = (size_t)strDepth.Atoi();
505 } break;
506 case 1: // input height
507 {
508 TString strHeight(batchDimString->GetString());
509 batchHeight = (size_t)strHeight.Atoi();
510 } break;
511 case 2: // input width
512 {
513 TString strWidth(batchDimString->GetString());
514 batchWidth = (size_t)strWidth.Atoi();
515 } break;
516 }
517 ++idxToken;
518 }
519
520 this->SetBatchDepth(batchDepth);
521 this->SetBatchHeight(batchHeight);
522 this->SetBatchWidth(batchWidth);
523}
524
525////////////////////////////////////////////////////////////////////////////////
526/// Create a deep net based on the layout string
527template <typename Architecture_t, typename Layer_t>
530{
531 // Layer specification, layer details
532 const TString layerDelimiter(",");
533 const TString subDelimiter("|");
534
536
537 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
538
539 // Split layers
543
544
545 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
546
547 // Split layer details
548 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
550 TObjString *token = (TObjString *)nextToken();
551
552 // Determine the type of the layer
553 TString strLayerType = token->GetString();
554
555
556 if (strLayerType == "DENSE") {
558 } else if (strLayerType == "CONV") {
560 } else if (strLayerType == "MAXPOOL") {
562 } else if (strLayerType == "RESHAPE") {
564 } else if (strLayerType == "BNORM") {
566 } else if (strLayerType == "RNN") {
568 } else if (strLayerType == "LSTM") {
570 } else if (strLayerType == "GRU") {
572 } else {
573 // no type of layer specified - assume is dense layer as in old DNN interface
575 }
576 }
577}
578
579////////////////////////////////////////////////////////////////////////////////
580/// Pases the layer string and creates the appropriate dense layer
581template <typename Architecture_t, typename Layer_t>
585{
586 int width = 0;
587 EActivationFunction activationFunction = EActivationFunction::kTanh;
588
589 // this return number of input variables for the method
590 // it can be used to deduce width of dense layer if specified as N+10
591 // where N is the number of input variables
592 const size_t inputSize = GetNvar();
593
594 // Split layer details
597 TObjString *token = (TObjString *)nextToken();
598
599 // loop on the tokens
600 // order of sepcifying width and activation function is not relevant
601 // both 100|TANH and TANH|100 are valid cases
602 for (; token != nullptr; token = (TObjString *)nextToken()) {
603 // try a match with the activation function
604 TString strActFnc(token->GetString());
605 // if first token defines the layer type- skip it
606 if (strActFnc =="DENSE") continue;
607
608 if (strActFnc == "RELU") {
609 activationFunction = DNN::EActivationFunction::kRelu;
610 } else if (strActFnc == "TANH") {
611 activationFunction = DNN::EActivationFunction::kTanh;
612 } else if (strActFnc == "FTANH") {
613 activationFunction = DNN::EActivationFunction::kFastTanh;
614 } else if (strActFnc == "SYMMRELU") {
615 activationFunction = DNN::EActivationFunction::kSymmRelu;
616 } else if (strActFnc == "SOFTSIGN") {
617 activationFunction = DNN::EActivationFunction::kSoftSign;
618 } else if (strActFnc == "SIGMOID") {
619 activationFunction = DNN::EActivationFunction::kSigmoid;
620 } else if (strActFnc == "LINEAR") {
621 activationFunction = DNN::EActivationFunction::kIdentity;
622 } else if (strActFnc == "GAUSS") {
623 activationFunction = DNN::EActivationFunction::kGauss;
624 } else if (width == 0) {
625 // no match found try to parse as text showing the width
626 // support for input a formula where the variable 'x' is 'N' in the string
627 // use TFormula for the evaluation
629 // number of nodes
630 TString strN("x");
631 strNumNodes.ReplaceAll("N", strN);
632 strNumNodes.ReplaceAll("n", strN);
633 TFormula fml("tmp", strNumNodes);
634 width = fml.Eval(inputSize);
635 }
636 }
637 // avoid zero width. assume is last layer and give width = output width
638 // Determine the number of outputs
639 size_t outputSize = 1;
641 outputSize = GetNTargets();
642 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
643 outputSize = DataInfo().GetNClasses();
644 }
645 if (width == 0) width = outputSize;
646
647 // Add the dense layer, initialize the weights and biases and copy
648 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649 denseLayer->Initialize();
650
651 // add same layer to fNet
652 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
653
654 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
655
656 // add the copy to all slave nets
657 //for (size_t i = 0; i < nets.size(); i++) {
658 // nets[i].AddDenseLayer(copyDenseLayer);
659 //}
660
661 // check compatibility of added layer
662 // for a dense layer input should be 1 x 1 x DxHxW
663}
664
665////////////////////////////////////////////////////////////////////////////////
666/// Pases the layer string and creates the appropriate convolutional layer
667template <typename Architecture_t, typename Layer_t>
671{
672 int depth = 0;
673 int fltHeight = 0;
674 int fltWidth = 0;
675 int strideRows = 0;
676 int strideCols = 0;
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
679 EActivationFunction activationFunction = EActivationFunction::kTanh;
680
681 // Split layer details
684 TObjString *token = (TObjString *)nextToken();
685 int idxToken = 0;
686
687 for (; token != nullptr; token = (TObjString *)nextToken()) {
688 switch (idxToken) {
689 case 1: // depth
690 {
691 TString strDepth(token->GetString());
692 depth = strDepth.Atoi();
693 } break;
694 case 2: // filter height
695 {
697 fltHeight = strFltHeight.Atoi();
698 } break;
699 case 3: // filter width
700 {
701 TString strFltWidth(token->GetString());
702 fltWidth = strFltWidth.Atoi();
703 } break;
704 case 4: // stride in rows
705 {
707 strideRows = strStrideRows.Atoi();
708 } break;
709 case 5: // stride in cols
710 {
712 strideCols = strStrideCols.Atoi();
713 } break;
714 case 6: // zero padding height
715 {
718 } break;
719 case 7: // zero padding width
720 {
723 } break;
724 case 8: // activation function
725 {
726 TString strActFnc(token->GetString());
727 if (strActFnc == "RELU") {
728 activationFunction = DNN::EActivationFunction::kRelu;
729 } else if (strActFnc == "TANH") {
730 activationFunction = DNN::EActivationFunction::kTanh;
731 } else if (strActFnc == "SYMMRELU") {
732 activationFunction = DNN::EActivationFunction::kSymmRelu;
733 } else if (strActFnc == "SOFTSIGN") {
734 activationFunction = DNN::EActivationFunction::kSoftSign;
735 } else if (strActFnc == "SIGMOID") {
736 activationFunction = DNN::EActivationFunction::kSigmoid;
737 } else if (strActFnc == "LINEAR") {
738 activationFunction = DNN::EActivationFunction::kIdentity;
739 } else if (strActFnc == "GAUSS") {
740 activationFunction = DNN::EActivationFunction::kGauss;
741 }
742 } break;
743 }
744 ++idxToken;
745 }
746
747 // Add the convolutional layer, initialize the weights and biases and copy
748 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749 zeroPadHeight, zeroPadWidth, activationFunction);
750 convLayer->Initialize();
751
752 // Add same layer to fNet
753 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
755
756 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
757
758 //// add the copy to all slave nets
759 //for (size_t i = 0; i < nets.size(); i++) {
760 // nets[i].AddConvLayer(copyConvLayer);
761 //}
762}
763
764////////////////////////////////////////////////////////////////////////////////
765/// Pases the layer string and creates the appropriate max pool layer
766template <typename Architecture_t, typename Layer_t>
770{
771
772 int filterHeight = 0;
773 int filterWidth = 0;
774 int strideRows = 0;
775 int strideCols = 0;
776
777 // Split layer details
780 TObjString *token = (TObjString *)nextToken();
781 int idxToken = 0;
782
783 for (; token != nullptr; token = (TObjString *)nextToken()) {
784 switch (idxToken) {
785 case 1: // filter height
786 {
788 filterHeight = strFrmHeight.Atoi();
789 } break;
790 case 2: // filter width
791 {
792 TString strFrmWidth(token->GetString());
793 filterWidth = strFrmWidth.Atoi();
794 } break;
795 case 3: // stride in rows
796 {
798 strideRows = strStrideRows.Atoi();
799 } break;
800 case 4: // stride in cols
801 {
803 strideCols = strStrideCols.Atoi();
804 } break;
805 }
806 ++idxToken;
807 }
808
809 // Add the Max pooling layer
810 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
811 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
812
813 // Add the same layer to fNet
814 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816
817 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
818
819 //// add the copy to all slave nets
820 //for (size_t i = 0; i < nets.size(); i++) {
821 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
822 //}
823}
824
825////////////////////////////////////////////////////////////////////////////////
826/// Pases the layer string and creates the appropriate reshape layer
827template <typename Architecture_t, typename Layer_t>
831{
832 int depth = 0;
833 int height = 0;
834 int width = 0;
835 bool flattening = false;
836
837 // Split layer details
840 TObjString *token = (TObjString *)nextToken();
841 int idxToken = 0;
842
843 for (; token != nullptr; token = (TObjString *)nextToken()) {
844 if (token->GetString() == "FLAT") idxToken=4;
845 switch (idxToken) {
846 case 1: {
847 TString strDepth(token->GetString());
848 depth = strDepth.Atoi();
849 } break;
850 case 2: // height
851 {
852 TString strHeight(token->GetString());
853 height = strHeight.Atoi();
854 } break;
855 case 3: // width
856 {
857 TString strWidth(token->GetString());
858 width = strWidth.Atoi();
859 } break;
860 case 4: // flattening
861 {
862 TString flat(token->GetString());
863 if (flat == "FLAT") {
864 flattening = true;
865 }
866 } break;
867 }
868 ++idxToken;
869 }
870
871 // Add the reshape layer
872 // TReshapeLayer<Architecture_t> *reshapeLayer =
873 deepNet.AddReshapeLayer(depth, height, width, flattening);
874
875 // Add the same layer to fNet
876 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
877
878 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
879
880 //// add the copy to all slave nets
881 //for (size_t i = 0; i < nets.size(); i++) {
882 // nets[i].AddReshapeLayer(copyReshapeLayer);
883 //}
884}
885
886////////////////////////////////////////////////////////////////////////////////
887/// Pases the layer string and creates the appropriate reshape layer
888template <typename Architecture_t, typename Layer_t>
892{
893
894 // default values
895 double momentum = -1; //0.99;
896 double epsilon = 0.0001;
897
898 // Split layer details
901 TObjString *token = (TObjString *)nextToken();
902 int idxToken = 0;
903
904 for (; token != nullptr; token = (TObjString *)nextToken()) {
905 switch (idxToken) {
906 case 1: {
907 momentum = std::atof(token->GetString().Data());
908 } break;
909 case 2: // height
910 {
911 epsilon = std::atof(token->GetString().Data());
912 } break;
913 }
914 ++idxToken;
915 }
916
917 // Add the batch norm layer
918 //
919 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
920 layer->Initialize();
921
922 // Add the same layer to fNet
923 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
924
925}
926
927////////////////////////////////////////////////////////////////////////////////
928/// Pases the layer string and creates the appropriate rnn layer
929template <typename Architecture_t, typename Layer_t>
933{
934 // int depth = 0;
935 int stateSize = 0;
936 int inputSize = 0;
937 int timeSteps = 0;
938 bool rememberState = false;
939 bool returnSequence = false;
940 bool resetGateAfter = false;
941
942 // Split layer details
945 TObjString *token = (TObjString *)nextToken();
946 int idxToken = 0;
947
948 for (; token != nullptr; token = (TObjString *)nextToken()) {
949 switch (idxToken) {
950 case 1: // state size
951 {
953 stateSize = strstateSize.Atoi();
954 break;
955 }
956 case 2: // input size
957 {
959 inputSize = strinputSize.Atoi();
960 break;
961 }
962 case 3: // time steps
963 {
965 timeSteps = strtimeSteps.Atoi();
966 break;
967 }
968 case 4: // returnSequence (option stateful in Keras)
969 {
972 break;
973 }
974 case 5: // return full output sequence (1 or 0)
975 {
976 TString str(token->GetString());
977 returnSequence = (bool)str.Atoi();
978 break;
979 }
980 case 6: // resetGate after option (only for GRU)
981 {
982 TString str(token->GetString());
983 resetGateAfter = (bool)str.Atoi();
984 }
985 }
986 ++idxToken;
987 }
988
989 // Add the recurrent layer, initialize the weights and biases and copy
990 if (rnnType == kLayerRNN) {
991 auto * recurrentLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
992 recurrentLayer->Initialize();
993 // Add same layer to fNet
994 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 }
996 else if (rnnType == kLayerLSTM ) {
997 auto *recurrentLayer = deepNet.AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 recurrentLayer->Initialize();
999 // Add same layer to fNet
1000 if (fBuildNet)
1001 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1002 }
1003 else if (rnnType == kLayerGRU) {
1004 if (Architecture_t::IsCudnn()) resetGateAfter = true; // needed for Cudnn
1005 auto *recurrentLayer = deepNet.AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1006 recurrentLayer->Initialize();
1007 // Add same layer to fNet
1008 if (fBuildNet)
1009 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1010 }
1011 else {
1012 Log() << kFATAL << "Invalid Recurrent layer type " << Endl;
1013 }
1014}
1015
1016////////////////////////////////////////////////////////////////////////////////
1017/// Standard constructor.
1019 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1020 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1021 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1022 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1023 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1024 fXInput()
1025{
1026 // Nothing to do here
1027}
1028
1029////////////////////////////////////////////////////////////////////////////////
1030/// Constructor from a weight file.
1032 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1033 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1034 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1035 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1036 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1037 fXInput()
1038{
1039 // Nothing to do here
1040}
1041
1042////////////////////////////////////////////////////////////////////////////////
1043/// Destructor.
1045{
1046 // Nothing to do here
1047}
1048
1049////////////////////////////////////////////////////////////////////////////////
1050/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1052{
1053 // remove empty spaces
1054 parseString.ReplaceAll(" ","");
1056 const TString keyValueDelim("=");
1057
1061
1062 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1063 blockKeyValues.push_back(std::map<TString, TString>());
1064 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1065
1066 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1068 TObjString *token = (TObjString *)nextToken();
1069
1070 for (; token != nullptr; token = (TObjString *)nextToken()) {
1071 TString strKeyValue(token->GetString());
1072 int delimPos = strKeyValue.First(keyValueDelim.Data());
1073 if (delimPos <= 0) continue;
1074
1076 strKey.ToUpper();
1078
1079 strKey.Strip(TString::kBoth, ' ');
1080 strValue.Strip(TString::kBoth, ' ');
1081
1082 currentBlock.insert(std::make_pair(strKey, strValue));
1083 }
1084 }
1085 return blockKeyValues;
1086}
1087
1088////////////////////////////////////////////////////////////////////////////////
1089/// What kind of analysis type can handle the CNN
1091{
1092 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1093 if (type == Types::kMulticlass) return kTRUE;
1094 if (type == Types::kRegression) return kTRUE;
1095
1096 return kFALSE;
1097}
1098
1099////////////////////////////////////////////////////////////////////////////////
1100/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1101/// 100 etc.
1102/// - 20% and 0.2 selects 20% of the training set as validation data.
1103/// - 100 selects 100 events as the validation data.
1104///
1105/// @return number of samples in validation set
1106///
1108{
1110 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1111
1112 // Parsing + Validation
1113 // --------------------
1114 if (fNumValidationString.EndsWith("%")) {
1115 // Relative spec. format 20%
1116 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1117
1118 if (intValStr.IsFloat()) {
1119 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1120 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1121 } else {
1122 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1123 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1124 }
1125 } else if (fNumValidationString.IsFloat()) {
1126 Double_t valSizeAsDouble = fNumValidationString.Atof();
1127
1128 if (valSizeAsDouble < 1.0) {
1129 // Relative spec. format 0.2
1130 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1131 } else {
1132 // Absolute spec format 100 or 100.0
1134 }
1135 } else {
1136 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1137 << Endl;
1138 }
1139
1140 // Value validation
1141 // ----------------
1142 if (nValidationSamples < 0) {
1143 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1144 }
1145
1146 if (nValidationSamples == 0) {
1147 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1148 }
1149
1151 Log() << kFATAL << "Validation size \"" << fNumValidationString
1152 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1153 }
1154
1155 return nValidationSamples;
1156}
1157
1158
1159////////////////////////////////////////////////////////////////////////////////
1160/// Implementation of architecture specific train method
1161///
1162template <typename Architecture_t>
1164{
1165
1166 using Scalar_t = typename Architecture_t::Scalar_t;
1170
1171 bool debug = Log().GetMinType() == kDEBUG;
1172
1173
1174 // set the random seed for weight initialization
1175 Architecture_t::SetRandomSeed(fRandomSeed);
1176
1177 ///split training data in training and validation data
1178 // and determine the number of training and testing examples
1179
1182
1183 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1186
1187 size_t trainingPhase = 1;
1188
1190
1191 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1192
1193
1194 // After the processing of the options, initialize the master deep net
1195 size_t batchSize = settings.batchSize;
1196 this->SetBatchSize(batchSize);
1197 // Should be replaced by actual implementation. No support for this now.
1198 size_t inputDepth = this->GetInputDepth();
1199 size_t inputHeight = this->GetInputHeight();
1200 size_t inputWidth = this->GetInputWidth();
1201 size_t batchDepth = this->GetBatchDepth();
1202 size_t batchHeight = this->GetBatchHeight();
1203 size_t batchWidth = this->GetBatchWidth();
1204 ELossFunction J = this->GetLossFunction();
1206 ERegularization R = settings.regularization;
1207 EOptimizer O = settings.optimizer;
1208 Scalar_t weightDecay = settings.weightDecay;
1209
1210 //Batch size should be included in batch layout as well. There are two possibilities:
1211 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1212 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1213 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1214 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1215
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1218 return;
1219 }
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1222 return;
1223 }
1224
1225
1226 //check also that input layout compatible with batch layout
1227 bool badLayout = false;
1228 // case batch depth == batch size
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1231 // case batch Height is batch size
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1234 if (badLayout) {
1235 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1237 return;
1238 }
1239
1240 // check batch size is compatible with number of events
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL << "Number of samples in the datasets are train: ("
1243 << nTrainingSamples << ") test: (" << nValidationSamples
1244 << "). One of these is smaller than the batch size of "
1245 << settings.batchSize << ". Please increase the batch"
1246 << " size to be at least the same size as the smallest"
1247 << " of them." << Endl;
1248 }
1249
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1251
1252 // create a copy of DeepNet for evaluating but with batch size = 1
1253 // fNet is the saved network and will be with CPU or Referrence architecture
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1257 fBuildNet = true;
1258 }
1259 else
1260 fBuildNet = false;
1261
1262 // Initialize the vector of slave nets
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (size_t i = 0; i < nThreads; i++) {
1266 // create a copies of the master deep net
1267 nets.push_back(deepNet);
1268 }
1269
1270
1271 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1273
1274
1275 // set droput probabilities
1276 // use convention to store in the layer 1.- dropout probabilities
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (auto & p : dropoutVector) {
1279 p = 1.0 - p;
1280 }
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1282
1283 if (trainingPhase > 1) {
1284 // copy initial weights from fNet to deepnet
1285 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1287 }
1288 }
1289
1290 // when fNet is built create also input matrix that will be used to evaluate it
1291 if (fBuildNet) {
1292 //int n1 = batchHeight;
1293 //int n2 = batchWidth;
1294 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1295 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1296 //fXInput = TensorImpl_t(1,n1,n2);
1298 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1301
1302
1303 // create pointer to output matrix used for the predictions
1304 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1305
1306 // print the created network
1307 Log() << "***** Deep Learning Network *****" << Endl;
1308 if (Log().GetMinType() <= kINFO)
1309 deepNet.Print();
1310 }
1311 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1312
1313 // Loading the training and validation datasets
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1319
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1325
1326
1327
1328 // do an evaluation of the network to compute initial minimum test error
1329
1331
1332 Double_t minValError = 0.0;
1333 Log() << "Compute initial loss on the validation data " << Endl;
1334 for (auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1338
1339 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1340 // should we apply droput to the loss ??
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1342 }
1343 // add Regularization term
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1347
1348
1349 // create a pointer to base class VOptimizer
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1351
1352 // initialize the base class pointer with the corresponding derived class object.
1353 switch (O) {
1354
1355 case EOptimizer::kSGD:
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1358 break;
1359
1360 case EOptimizer::kAdam: {
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1363 deepNet, settings.learningRate, settings.optimizerParams["ADAM_beta1"],
1364 settings.optimizerParams["ADAM_beta2"], settings.optimizerParams["ADAM_eps"]));
1365 break;
1366 }
1367
1368 case EOptimizer::kAdagrad:
1369 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1371 settings.optimizerParams["ADAGRAD_eps"]));
1372 break;
1373
1374 case EOptimizer::kRMSProp:
1375 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1377 settings.optimizerParams["RMSPROP_rho"],
1378 settings.optimizerParams["RMSPROP_eps"]));
1379 break;
1380
1381 case EOptimizer::kAdadelta:
1382 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1384 settings.optimizerParams["ADADELTA_rho"],
1385 settings.optimizerParams["ADADELTA_eps"]));
1386 break;
1387 }
1388
1389
1390 // Initialize the vector of batches, one batch for one slave network
1391 std::vector<TTensorBatch<Architecture_t>> batches{};
1392
1393 bool converged = false;
1394 size_t convergenceCount = 0;
1395 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1396
1397 // start measuring
1398 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1399 tstart = std::chrono::system_clock::now();
1400
1401 // function building string with optimizer parameters values for logging
1402 auto optimParametersString = [&]() {
1404 for ( auto & element : settings.optimizerParams) {
1405 TString key = element.first;
1406 key.ReplaceAll(settings.optimizerName + "_", ""); // strip optimizerName_
1407 double value = element.second;
1408 if (!optimParameters.IsNull())
1409 optimParameters += ",";
1410 else
1411 optimParameters += " (";
1412 optimParameters += TString::Format("%s=%g", key.Data(), value);
1413 }
1414 if (!optimParameters.IsNull())
1415 optimParameters += ")";
1416 return optimParameters;
1417 };
1418
1419 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1420 << " Optimizer " << settings.optimizerName
1422 << " Learning rate = " << settings.learningRate << " regularization " << (char)settings.regularization
1423 << " minimum error = " << minValError << Endl;
1424 if (!fInteractive) {
1425 std::string separator(62, '-');
1426 Log() << separator << Endl;
1427 Log() << std::setw(10) << "Epoch"
1428 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err." << std::setw(12)
1429 << "t(s)/epoch" << std::setw(12) << "t(s)/Loss" << std::setw(12) << "nEvents/s" << std::setw(12)
1430 << "Conv. Steps" << Endl;
1431 Log() << separator << Endl;
1432 }
1433
1434 // set up generator for shuffling the batches
1435 // if seed is zero we have always a different order in the batches
1436 size_t shuffleSeed = 0;
1439
1440 // print weights before
1441 if (fBuildNet && debug) {
1442 Log() << "Initial Deep Net Weights " << Endl;
1443 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1444 for (size_t l = 0; l < weights_tensor.size(); ++l)
1446 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1447 bias_tensor[0].Print();
1448 }
1449
1450 Log() << " Start epoch iteration ..." << Endl;
1451 bool debugFirstEpoch = false;
1452 bool computeLossInTraining = true; // compute loss in training or at test time
1453 size_t nTrainEpochs = 0;
1454 while (!converged) {
1455 nTrainEpochs++;
1456 trainingData.Shuffle(rng);
1457
1458 // execute all epochs
1459 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1460
1462 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1463 // Clean and load new batches, one batch for one slave net
1464 //batches.clear();
1465 //batches.reserve(nThreads);
1466 //for (size_t j = 0; j < nThreads; j++) {
1467 // batches.push_back(trainingData.GetTensorBatch());
1468 //}
1469 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1470
1471 auto my_batch = trainingData.GetTensorBatch();
1472
1473 if (debugFirstEpoch)
1474 std::cout << "got batch data - doing forward \n";
1475
1476#ifdef DEBUG
1477
1478 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1479 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1480 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1481 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1482 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1483#endif
1484
1485 deepNet.Forward(my_batch.GetInput(), true);
1486 // compute also loss
1488 auto outputMatrix = my_batch.GetOutput();
1489 auto weights = my_batch.GetWeights();
1490 trainingError += deepNet.Loss(outputMatrix, weights, false);
1491 }
1492
1493 if (debugFirstEpoch)
1494 std::cout << "- doing backward \n";
1495
1496#ifdef DEBUG
1497 size_t nlayers = deepNet.GetLayers().size();
1498 for (size_t l = 0; l < nlayers; ++l) {
1499 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1500 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1501 TString::Format("initial weights layer %d", l).Data());
1502
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1504 TString::Format("output tensor layer %d", l).Data());
1505 }
1506#endif
1507
1508 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1509
1510 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1511
1512 if (debugFirstEpoch)
1513 std::cout << "- doing optimizer update \n";
1514
1515 // increment optimizer step that is used in some algorithms (e.g. ADAM)
1516 optimizer->IncrementGlobalStep();
1517 optimizer->Step();
1518
1519#ifdef DEBUG
1520 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1521 for (size_t l = 0; l < nlayers; ++l) {
1522 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1523 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1525 }
1526 }
1527#endif
1528
1529 }
1530
1531 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1532 //}
1533 debugFirstEpoch = false;
1534 if ((nTrainEpochs % settings.testInterval) == 0) {
1535
1536 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1537
1538 t1 = std::chrono::system_clock::now();
1539
1540 // Compute validation error.
1541
1542
1543 Double_t valError = 0.0;
1544 bool inTraining = false;
1545 for (auto batch : validationData) {
1546 auto inputTensor = batch.GetInput();
1547 auto outputMatrix = batch.GetOutput();
1548 auto weights = batch.GetWeights();
1549 // should we apply droput to the loss ??
1550 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1551 }
1552 // normalize loss to number of batches and add regularization term
1553 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1554 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1555 valError += regTerm;
1556
1557 //Log the loss value
1559
1560 t2 = std::chrono::system_clock::now();
1561
1562 // checking for convergence
1563 if (valError < minValError) {
1564 convergenceCount = 0;
1565 } else {
1566 convergenceCount += settings.testInterval;
1567 }
1568
1569 // copy configuration when reached a minimum error
1570 if (valError < minValError ) {
1571 // Copy weights from deepNet to fNet
1572 Log() << std::setw(10) << nTrainEpochs
1573 << " Minimum Test error found - save the configuration " << Endl;
1574 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1575 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1576 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1577 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1578 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1579 // }
1580 }
1581 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1582 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1583
1585 }
1586 else if ( minValError <= 0. )
1588
1589 if (!computeLossInTraining) {
1590 trainingError = 0.0;
1591 // Compute training error.
1592 for (auto batch : trainingData) {
1593 auto inputTensor = batch.GetInput();
1594 auto outputMatrix = batch.GetOutput();
1595 auto weights = batch.GetWeights();
1596 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1597 }
1598 }
1599 // normalize loss to number of batches and add regularization term
1602
1603 //Log the loss value
1605
1606 // stop measuring
1607 tend = std::chrono::system_clock::now();
1608
1609 // Compute numerical throughput.
1610 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1611 std::chrono::duration<double> elapsed1 = t1-tstart;
1612 // std::chrono::duration<double> elapsed2 = t2-tstart;
1613 // time to compute training and test errors
1614 std::chrono::duration<double> elapsed_testing = tend-t1;
1615
1616 double seconds = elapsed_seconds.count();
1617 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1618 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1619 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1620
1621 converged =
1622 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1623
1624
1625 Log() << std::setw(10) << nTrainEpochs << " | "
1626 << std::setw(12) << trainingError
1627 << std::setw(12) << valError
1628 << std::setw(12) << seconds / settings.testInterval
1629 << std::setw(12) << elapsed_testing.count()
1630 << std::setw(12) << 1. / eventTime
1631 << std::setw(12) << convergenceCount
1632 << Endl;
1633
1634 if (converged) {
1635 Log() << Endl;
1636 }
1637 tstart = std::chrono::system_clock::now();
1638 }
1639
1640 // if (stepCount % 10 == 0 || converged) {
1641 if (converged && debug) {
1642 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << nTrainEpochs
1643 << Endl;
1644 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1645 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1646 for (size_t l = 0; l < weights_tensor.size(); ++l)
1648 bias_tensor[0].Print();
1649 }
1650
1651 }
1652
1653 trainingPhase++;
1654 } // end loop on training Phase
1655}
1656
1657////////////////////////////////////////////////////////////////////////////////
1659{
1660 if (fInteractive) {
1661 Log() << kFATAL << "Not implemented yet" << Endl;
1662 return;
1663 }
1664
1665 // using for training same scalar type defined for the prediction
1666 if (this->GetArchitectureString() == "GPU") {
1667#ifdef R__HAS_TMVAGPU
1668 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1669#ifdef R__HAS_CUDNN
1671#else
1673#endif
1674#else
1675 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1676 "you have CUDA installed and it was successfully "
1677 "detected by CMAKE."
1678 << Endl;
1679 return;
1680#endif
1681 } else if (this->GetArchitectureString() == "CPU") {
1682#ifdef R__HAS_TMVACPU
1683 // note that number of threads used for BLAS might be different
1684 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1685 Log() << kINFO << "Start of deep neural network training on CPU using MT, nthreads = "
1686 << gConfig().GetNCpu() << Endl << Endl;
1687#else
1688 Log() << kINFO << "Start of deep neural network training on single thread CPU (without ROOT-MT support) " << Endl
1689 << Endl;
1690#endif
1692 return;
1693 }
1694 else {
1695 Log() << kFATAL << this->GetArchitectureString() <<
1696 " is not a supported architecture for TMVA::MethodDL"
1697 << Endl;
1698 }
1699
1700}
1701
1702////////////////////////////////////////////////////////////////////////////////
1704{
1705 // fill the input tensor fXInput from the current Event data
1706 // with the correct shape depending on the model used
1707 // The input tensor is used for network prediction after training
1708 // using a single event. The network batch size must be equal to 1.
1709 // The architecture specified at compile time in ArchitectureImpl_t
1710 // is used. This should be the CPU architecture
1711
1712 if (!fNet || fNet->GetDepth() == 0) {
1713 Log() << kFATAL << "The network has not been trained and fNet is not built" << Endl;
1714 }
1715 if (fNet->GetBatchSize() != 1) {
1716 Log() << kFATAL << "FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" << Endl;
1717 }
1718
1719 // get current event
1720 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1721 size_t nVariables = GetEvent()->GetNVariables();
1722
1723 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1724 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1725 R__ASSERT(fXInput.GetShape().size() < 4);
1726 size_t nc, nhw = 0;
1727 if (fXInput.GetShape().size() == 2) {
1728 nc = fXInput.GetShape()[0];
1729 if (nc != 1) {
1730 ArchitectureImpl_t::PrintTensor(fXInput);
1731 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1" << Endl;
1732 }
1733 nhw = fXInput.GetShape()[1];
1734 } else {
1735 nc = fXInput.GetCSize();
1736 nhw = fXInput.GetWSize();
1737 }
1738 if (nVariables != nc * nhw) {
1739 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1740 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw << Endl;
1741 }
1742 for (size_t j = 0; j < nc; j++) {
1743 for (size_t k = 0; k < nhw; k++) {
1744 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1745 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k]; // for column layout !!!
1746 }
1747 }
1748 } else {
1749 // row-wise layout
1750 assert(fXInput.GetShape().size() >= 4);
1751 size_t nc = fXInput.GetCSize();
1752 size_t nh = fXInput.GetHSize();
1753 size_t nw = fXInput.GetWSize();
1754 size_t n = nc * nh * nw;
1755 if (nVariables != n) {
1756 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1757 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1758 << Endl;
1759 }
1760 for (size_t j = 0; j < n; j++) {
1761 // in this case TMVA event has same order as input tensor
1762 fXInputBuffer[j] = inputValues[j]; // for column layout !!!
1763 }
1764 }
1765 // copy buffer in input
1766 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1767 return;
1768}
1769
1770////////////////////////////////////////////////////////////////////////////////
1771Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1772{
1773
1775
1776 // perform the prediction
1777 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1778
1779 // return value
1780 double mvaValue = (*fYHat)(0, 0);
1781
1782 // for debugging
1783#ifdef DEBUG_MVAVALUE
1784 using Tensor_t = std::vector<MatrixImpl_t>;
1785 TMatrixF xInput(n1,n2, inputValues.data() );
1786 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1787 xInput.Print();
1788 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1789 auto & deepnet = *fNet;
1790 std::cout << "Loop on layers " << std::endl;
1791 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1792 std::cout << "Layer " << l;
1793 const auto * layer = deepnet.GetLayerAt(l);
1794 const Tensor_t & layer_output = layer->GetOutput();
1795 layer->Print();
1796 std::cout << "DNN output " << layer_output.size() << std::endl;
1797 for (size_t i = 0; i < layer_output.size(); ++i) {
1798#ifdef R__HAS_TMVAGPU
1799 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1800 TMatrixD m = layer_output[i];
1801#else
1802 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1803#endif
1804 m.Print();
1805 }
1806 const Tensor_t & layer_weights = layer->GetWeights();
1807 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1808 if (layer_weights.size() > 0) {
1809 int i = 0;
1810#ifdef R__HAS_TMVAGPU
1812// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1813#else
1814 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1815#endif
1816 m.Print();
1817 }
1818 }
1819#endif
1820
1821 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1822}
1823////////////////////////////////////////////////////////////////////////////////
1824/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1825////////////////////////////////////////////////////////////////////////////////
1826template <typename Architecture_t>
1828{
1829
1830 // Check whether the model is setup
1831 if (!fNet || fNet->GetDepth() == 0) {
1832 Log() << kFATAL << "The network has not been trained and fNet is not built"
1833 << Endl;
1834 }
1835
1836 // rebuild the networks
1837 this->SetBatchSize(batchSize);
1838 size_t inputDepth = this->GetInputDepth();
1839 size_t inputHeight = this->GetInputHeight();
1840 size_t inputWidth = this->GetInputWidth();
1841 size_t batchDepth = this->GetBatchDepth();
1842 size_t batchHeight = this->GetBatchHeight();
1843 size_t batchWidth = this->GetBatchWidth();
1844 ELossFunction J = fNet->GetLossFunction();
1845 EInitialization I = fNet->GetInitialization();
1846 ERegularization R = fNet->GetRegularization();
1847 Double_t weightDecay = fNet->GetWeightDecay();
1848
1849 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1850 using Matrix_t = typename Architecture_t::Matrix_t;
1852
1853 // create the deep neural network
1854 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1855 std::vector<DeepNet_t> nets{};
1856 fBuildNet = false;
1858
1859 // copy weights from the saved fNet to the built DeepNet
1860 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1861 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1862 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1863 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1864 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1865 // }
1866 }
1867
1868 size_t n1 = deepNet.GetBatchHeight();
1869 size_t n2 = deepNet.GetBatchWidth();
1870 size_t n0 = deepNet.GetBatchSize();
1871 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1872 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1873 n1 = deepNet.GetBatchSize();
1874 n0 = 1;
1875 }
1876 //this->SetBatchDepth(n0);
1877 Long64_t nEvents = lastEvt - firstEvt;
1878 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1879 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1880
1881
1882 // Tensor_t xInput;
1883 // for (size_t i = 0; i < n0; ++i)
1884 // xInput.emplace_back(Matrix_t(n1,n2));
1885
1886 // create pointer to output matrix used for the predictions
1887 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1888
1889 // use timer
1890 Timer timer( nEvents, GetName(), kTRUE );
1891
1892 if (logProgress)
1893 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1894 << "Evaluation of " << GetMethodName() << " on "
1895 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1896 << " sample (" << nEvents << " events)" << Endl;
1897
1898
1899 // eventg loop
1900 std::vector<double> mvaValues(nEvents);
1901
1902
1903 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1904
1905 Long64_t ievt_end = ievt + batchSize;
1906 // case of batch prediction for
1907 if (ievt_end <= lastEvt) {
1908
1909 if (ievt == firstEvt) {
1911 size_t nVariables = GetEvent()->GetNVariables();
1912
1913 if (n1 == batchSize && n0 == 1) {
1914 if (n2 != nVariables) {
1915 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1916 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1917 << Endl;
1918 }
1919 } else {
1920 if (n1*n2 != nVariables || n0 != batchSize) {
1921 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1922 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1923 << Endl;
1924 }
1925 }
1926 }
1927
1928 auto batch = testData.GetTensorBatch();
1929 auto inputTensor = batch.GetInput();
1930
1931 auto xInput = batch.GetInput();
1932 // make the prediction
1933 deepNet.Prediction(yHat, xInput, fOutputFunction);
1934 for (size_t i = 0; i < batchSize; ++i) {
1935 double value = yHat(i,0);
1936 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1937 }
1938 }
1939 else {
1940 // case of remaining events: compute prediction by single event !
1941 for (Long64_t i = ievt; i < lastEvt; ++i) {
1942 Data()->SetCurrentEvent(i);
1943 mvaValues[i] = GetMvaValue();
1944 }
1945 }
1946 }
1947
1948 if (logProgress) {
1949 Log() << kINFO
1950 << "Elapsed time for evaluation of " << nEvents << " events: "
1951 << timer.GetElapsedTime() << " " << Endl;
1952 }
1953
1954 return mvaValues;
1955}
1956
1957//////////////////////////////////////////////////////////////////////////
1958/// Get the regression output values for a single event
1959//////////////////////////////////////////////////////////////////////////
1960const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1961{
1962
1963 FillInputTensor ();
1964
1965 // perform the network prediction
1966 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1967
1968 size_t nTargets = DataInfo().GetNTargets();
1969 R__ASSERT(nTargets == fYHat->GetNcols());
1970
1971 std::vector<Float_t> output(nTargets);
1972 for (size_t i = 0; i < nTargets; i++)
1973 output[i] = (*fYHat)(0, i);
1974
1975 // ned to transform back output values
1976 if (fRegressionReturnVal == NULL)
1977 fRegressionReturnVal = new std::vector<Float_t>(nTargets);
1978 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1979
1980 // N.B. one should cache here temporary event class
1981 Event *evT = new Event(*GetEvent());
1982 for (size_t i = 0; i < nTargets; ++i) {
1983 evT->SetTarget(i, output[i]);
1984 }
1985 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1986 for (size_t i = 0; i < nTargets; ++i) {
1987 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1988 }
1989 delete evT;
1990 return *fRegressionReturnVal;
1991}
1992//////////////////////////////////////////////////////////////////////////
1993/// Get the multi-class output values for a single event
1994//////////////////////////////////////////////////////////////////////////
1995const std::vector<Float_t> &TMVA::MethodDL::GetMulticlassValues()
1996{
1997
1998 FillInputTensor();
1999
2000 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2001
2002 size_t nClasses = DataInfo().GetNClasses();
2003 R__ASSERT(nClasses == fYHat->GetNcols());
2004
2005 if (fMulticlassReturnVal == NULL) {
2006 fMulticlassReturnVal = new std::vector<Float_t>(nClasses);
2007 }
2008 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2009
2010 for (size_t i = 0; i < nClasses; i++) {
2011 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2012 }
2013 return *fMulticlassReturnVal;
2014}
2015
2016////////////////////////////////////////////////////////////////////////////////
2017/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2018/// Here we will evaluate using a default batch size and the same architecture used for
2019/// Training
2020////////////////////////////////////////////////////////////////////////////////
2022{
2023
2024 Long64_t nEvents = Data()->GetNEvents();
2025 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026 if (firstEvt < 0) firstEvt = 0;
2027 nEvents = lastEvt-firstEvt;
2028
2029 // use same batch size as for training (from first strategy)
2030 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2031 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2032 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2033
2034 // using for training same scalar type defined for the prediction
2035 if (this->GetArchitectureString() == "GPU") {
2036#ifdef R__HAS_TMVAGPU
2037 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2038#ifdef R__HAS_CUDNN
2040#else
2042#endif
2043
2044#endif
2045 }
2046 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048}
2049////////////////////////////////////////////////////////////////////////////////
2050void MethodDL::AddWeightsXMLTo(void * parent) const
2051{
2052 // Create the parent XML node with name "Weights"
2053 auto & xmlEngine = gTools().xmlengine();
2054 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2055
2056 /*! Get all necessary information, in order to be able to reconstruct the net
2057 * if we read the same XML file. */
2058
2059 // Deep Net specific info
2060 Int_t depth = fNet->GetDepth();
2061
2062 Int_t inputDepth = fNet->GetInputDepth();
2063 Int_t inputHeight = fNet->GetInputHeight();
2064 Int_t inputWidth = fNet->GetInputWidth();
2065
2066 Int_t batchSize = fNet->GetBatchSize();
2067
2068 Int_t batchDepth = fNet->GetBatchDepth();
2069 Int_t batchHeight = fNet->GetBatchHeight();
2070 Int_t batchWidth = fNet->GetBatchWidth();
2071
2072 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2073 char initialization = static_cast<char>(fNet->GetInitialization());
2074 char regularization = static_cast<char>(fNet->GetRegularization());
2075
2076 Double_t weightDecay = fNet->GetWeightDecay();
2077
2078 // Method specific info (not sure these are needed)
2079 char outputFunction = static_cast<char>(this->GetOutputFunction());
2080 //char lossFunction = static_cast<char>(this->GetLossFunction());
2081
2082 // Add attributes to the parent node
2083 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2084
2085 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2086 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2087 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2088
2089 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2090 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2091 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2092 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2093
2094 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2095 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2096 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2097 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2098
2099 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2100
2101
2102 for (Int_t i = 0; i < depth; i++)
2103 {
2104 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2105 }
2106
2107
2108}
2109
2110////////////////////////////////////////////////////////////////////////////////
2112{
2113
2114 auto netXML = gTools().GetChild(rootXML, "Weights");
2115 if (!netXML){
2116 netXML = rootXML;
2117 }
2118
2119 size_t netDepth;
2120 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2121
2122 size_t inputDepth, inputHeight, inputWidth;
2123 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2124 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2125 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2126
2127 size_t batchSize, batchDepth, batchHeight, batchWidth;
2128 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2129 // use always batchsize = 1
2130 //batchSize = 1;
2131 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2132 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2133 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2134
2135 char lossFunctionChar;
2136 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2137 char initializationChar;
2138 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2139 char regularizationChar;
2140 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2141 char outputFunctionChar;
2142 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2143 double weightDecay;
2144 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2145
2146 // create the net
2147
2148 // DeepNetCpu_t is defined in MethodDL.h
2149 this->SetInputDepth(inputDepth);
2150 this->SetInputHeight(inputHeight);
2151 this->SetInputWidth(inputWidth);
2152 this->SetBatchDepth(batchDepth);
2153 this->SetBatchHeight(batchHeight);
2154 this->SetBatchWidth(batchWidth);
2155
2156
2157
2158 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2160 static_cast<ELossFunction>(lossFunctionChar),
2161 static_cast<EInitialization>(initializationChar),
2162 static_cast<ERegularization>(regularizationChar),
2163 weightDecay));
2164
2166
2167
2168 //size_t previousWidth = inputWidth;
2170
2171 // loop on the layer and add them to the network
2172 for (size_t i = 0; i < netDepth; i++) {
2173
2175
2176 // case of dense layer
2177 if (layerName == "DenseLayer") {
2178
2179 // read width and activation function and then we can create the layer
2180 size_t width = 0;
2181 gTools().ReadAttr(layerXML, "Width", width);
2182
2183 // Read activation function.
2185 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2186 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2187
2188
2189 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2190
2191 }
2192 // Convolutional Layer
2193 else if (layerName == "ConvLayer") {
2194
2195 // read width and activation function and then we can create the layer
2196 size_t depth = 0;
2197 gTools().ReadAttr(layerXML, "Depth", depth);
2198 size_t fltHeight, fltWidth = 0;
2199 size_t strideRows, strideCols = 0;
2200 size_t padHeight, padWidth = 0;
2201 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2202 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2203 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2204 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2205 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2206 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2207
2208 // Read activation function.
2210 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2212
2213
2214 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2216
2217 }
2218
2219 // MaxPool Layer
2220 else if (layerName == "MaxPoolLayer") {
2221
2222 // read maxpool layer info
2223 size_t filterHeight, filterWidth = 0;
2224 size_t strideRows, strideCols = 0;
2225 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2226 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2227 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2228 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2229
2230 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2231 }
2232 // Reshape Layer
2233 else if (layerName == "ReshapeLayer") {
2234
2235 // read reshape layer info
2236 size_t depth, height, width = 0;
2237 gTools().ReadAttr(layerXML, "Depth", depth);
2238 gTools().ReadAttr(layerXML, "Height", height);
2239 gTools().ReadAttr(layerXML, "Width", width);
2240 int flattening = 0;
2241 gTools().ReadAttr(layerXML, "Flattening",flattening );
2242
2243 fNet->AddReshapeLayer(depth, height, width, flattening);
2244
2245 }
2246 // RNN Layer
2247 else if (layerName == "RNNLayer") {
2248
2249 // read RNN layer info
2250 size_t stateSize,inputSize, timeSteps = 0;
2251 int rememberState= 0;
2252 int returnSequence = 0;
2253 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2254 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2255 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2256 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2257 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2258
2259 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2260
2261 }
2262 // LSTM Layer
2263 else if (layerName == "LSTMLayer") {
2264
2265 // read RNN layer info
2266 size_t stateSize,inputSize, timeSteps = 0;
2268 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2269 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2270 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2271 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2272 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2273
2274 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2275
2276 }
2277 // GRU Layer
2278 else if (layerName == "GRULayer") {
2279
2280 // read RNN layer info
2281 size_t stateSize,inputSize, timeSteps = 0;
2283 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2284 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2285 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2286 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2287 gTools().ReadAttr(layerXML, "ReturnSequence", returnSequence);
2288 gTools().ReadAttr(layerXML, "ResetGateAfter", resetGateAfter);
2289
2291 Warning("ReadWeightsFromXML",
2292 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2293
2294 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2295 }
2296 // BatchNorm Layer
2297 else if (layerName == "BatchNormLayer") {
2298 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2299 fNet->AddBatchNormLayer(0., 0.0);
2300 }
2301 // read weights and biases
2302 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2303
2304 // read next layer
2306 }
2307
2308 fBuildNet = false;
2309 // create now the input and output matrices
2310 //int n1 = batchHeight;
2311 //int n2 = batchWidth;
2312 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2313 //if (fXInput.size() > 0) fXInput.clear();
2314 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2316 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2317 // make here a ColumnMajor tensor
2318 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2320
2321 // create pointer to output matrix used for the predictions
2322 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2323
2324
2325}
2326
2327
2328////////////////////////////////////////////////////////////////////////////////
2329void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2330{
2331}
2332
2333////////////////////////////////////////////////////////////////////////////////
2335{
2336 // TODO
2337 return NULL;
2338}
2339
2340////////////////////////////////////////////////////////////////////////////////
2342{
2343 // TODO
2344}
2345
2346} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int)
Definition RtypesCore.h:60
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
const_iterator end() const
The Formula class.
Definition TFormula.h:89
UInt_t GetNCpu()
Definition Config.h:70
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
MsgLogger & Log() const
Adadelta Optimizer class.
Definition Adadelta.h:45
Adagrad Optimizer class.
Definition Adagrad.h:45
Adam Optimizer class.
Definition Adam.h:45
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition Cpu.h:108
static bool IsCudnn()
Definition Cpu.h:131
Generic Deep Neural Network class.
Definition DeepNet.h:73
RMSProp Optimizer class.
Definition RMSProp.h:45
Stochastic Batch Gradient Descent Optimizer class.
Definition SGD.h:46
Generic General Layer class.
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
Virtual base Class for all MVA method.
Definition MethodBase.h:111
const char * GetName() const override
Definition MethodBase.h:334
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:686
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
Definition MethodBase.h:346
const TString & GetMethodName() const
Definition MethodBase.h:331
const Event * GetEvent() const
Definition MethodBase.h:751
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
UInt_t GetNVariables() const
Definition MethodBase.h:345
Types::EAnalysisType fAnalysisType
Definition MethodBase.h:595
UInt_t GetNvar() const
Definition MethodBase.h:344
TrainingHistory fTrainHistory
Definition MethodBase.h:425
DataSet * Data() const
Definition MethodBase.h:409
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
Definition MethodBase.h:448
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition MethodDL.h:183
DNN::ELossFunction fLossFunction
The loss function.
Definition MethodDL.h:190
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition MethodDL.h:178
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Check the type of analysis the deep learning network can do.
TString fLayoutString
The string defining the layout of the deep net.
Definition MethodDL.h:194
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
void SetInputDepth(int inputDepth)
Setters.
Definition MethodDL.h:286
std::unique_ptr< MatrixImpl_t > fYHat
Definition MethodDL.h:208
size_t GetBatchHeight() const
Definition MethodDL.h:263
void ReadWeightsFromXML(void *wghtnode) override
TString fWeightInitializationString
The string defining the weight initialization method.
Definition MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition MethodDL.cxx:767
TensorImpl_t fXInput
Definition MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition MethodDL.h:186
TString fArchitectureString
The string defining the architecture: CPU or GPU.
Definition MethodDL.h:198
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void Train() override
Methods for training the deep learning network.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition MethodDL.cxx:582
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition MethodDL.h:288
HostBufferImpl_t fXInputBuffer
Definition MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition MethodDL.h:184
size_t GetInputDepth() const
Definition MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition MethodDL.h:209
TString GetInputLayoutString() const
Definition MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition MethodDL.h:293
void GetHelpMessage() const override
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition MethodDL.h:93
size_t GetInputHeight() const
Definition MethodDL.h:256
TString GetArchitectureString() const
Definition MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition MethodDL.cxx:481
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:889
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
const std::vector< Float_t > & GetRegressionValues() override
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition MethodDL.h:199
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition MethodDL.h:108
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition MethodDL.h:188
void AddWeightsXMLTo(void *parent) const override
size_t GetBatchDepth() const
Definition MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition MethodDL.cxx:930
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition MethodDL.h:204
size_t GetInputWidth() const
Definition MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition MethodDL.h:193
void DeclareOptions() override
The option handling methods.
Definition MethodDL.cxx:166
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition MethodDL.cxx:668
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition MethodDL.cxx:828
TString fTrainingStrategyString
The string defining the training strategy.
Definition MethodDL.h:196
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition MethodDL.h:292
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
Definition MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
void ReadWeightsFromStream(std::istream &) override
DNN::EInitialization GetWeightInitialization() const
Definition MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition MethodDL.h:291
TString GetLayoutString() const
Definition MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition MethodDL.h:182
void ProcessOptions() override
Definition MethodDL.cxx:218
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition MethodDL.h:106
void Init() override
default initializations
Definition MethodDL.cxx:431
size_t GetBatchWidth() const
Definition MethodDL.h:264
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition MethodDL.h:107
const std::vector< Float_t > & GetMulticlassValues() override
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
Definition MethodDL.cxx:438
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition MethodDL.h:201
const Ranking * CreateRanking() override
void SetInputHeight(int inputHeight)
Definition MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition MethodDL.cxx:528
TString fErrorStrategy
The string defining the error strategy for training.
Definition MethodDL.h:195
TString fInputLayoutString
The string defining the layout of the input.
Definition MethodDL.h:192
EMsgType GetMinType() const
Definition MsgLogger.h:69
Ranking for variables in method (implementation)
Definition Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition Tools.cxx:1223
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kFATAL
Definition Types.h:61
void Print(Option_t *option="") const override
Dump this marker with its attributes.
Definition TMarker.cxx:338
TMatrixT.
Definition TMatrixT.h:40
void Print(Option_t *option="") const override
Print TNamed name and title.
Definition TNamed.cxx:127
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
const TString & GetString() const
Definition TObjString.h:46
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:1057
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:1071
Basic string class.
Definition TString.h:138
const char * Data() const
Definition TString.h:384
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition TString.h:712
@ kTrailing
Definition TString.h:284
@ kBoth
Definition TString.h:284
void ToUpper()
Change string to upper case.
Definition TString.cxx:1202
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition legend1.C:16
#define I(x, y, z)
EOptimizer
Enum representing the optimizer used for training.
Definition Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:39
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition MethodDL.cxx:74
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition TMath.h:903
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767
All of the options that can be specified in the training string.
Definition MethodDL.h:72
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
auto * t1
Definition textangle.C:20
static void output()