Logo ROOT  
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31
32#include "TMVA/Tools.h"
33#include "TMVA/Configurable.h"
34#include "TMVA/IMethod.h"
36#include "TMVA/MethodDL.h"
37#include "TMVA/Types.h"
39#include "TMVA/DNN/Functions.h"
41#include "TMVA/DNN/SGD.h"
42#include "TMVA/DNN/Adam.h"
43#include "TMVA/DNN/Adagrad.h"
44#include "TMVA/DNN/RMSProp.h"
45#include "TMVA/DNN/Adadelta.h"
46#include "TMVA/Timer.h"
47
48#include "TStopwatch.h"
49
50#include <chrono>
51
54
55using namespace TMVA::DNN::CNN;
56using namespace TMVA::DNN;
57
63
64
65namespace TMVA {
66
67
68////////////////////////////////////////////////////////////////////////////////
69TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
70{
71 key.ToUpper();
72 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
73 if (it == keyValueMap.end()) {
74 return TString("");
75 }
76 return it->second;
77}
78
79////////////////////////////////////////////////////////////////////////////////
80template <typename T>
81T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
82
83////////////////////////////////////////////////////////////////////////////////
84template <>
85int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
86{
87 TString value(fetchValueTmp(keyValueMap, key));
88 if (value == "") {
89 return defaultValue;
90 }
91 return value.Atoi();
92}
93
94////////////////////////////////////////////////////////////////////////////////
95template <>
96double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
97{
98 TString value(fetchValueTmp(keyValueMap, key));
99 if (value == "") {
100 return defaultValue;
101 }
102 return value.Atof();
103}
104
105////////////////////////////////////////////////////////////////////////////////
106template <>
107TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
108{
109 TString value(fetchValueTmp(keyValueMap, key));
110 if (value == "") {
111 return defaultValue;
112 }
113 return value;
114}
115
116////////////////////////////////////////////////////////////////////////////////
117template <>
118bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
119{
120 TString value(fetchValueTmp(keyValueMap, key));
121 if (value == "") {
122 return defaultValue;
123 }
124
125 value.ToUpper();
126 if (value == "TRUE" || value == "T" || value == "1") {
127 return true;
128 }
129
130 return false;
131}
132
133////////////////////////////////////////////////////////////////////////////////
134template <>
135std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
136 std::vector<double> defaultValue)
137{
138 TString parseString(fetchValueTmp(keyValueMap, key));
139 if (parseString == "") {
140 return defaultValue;
141 }
142
143 parseString.ToUpper();
144 std::vector<double> values;
145
146 const TString tokenDelim("+");
147 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
148 TIter nextToken(tokenStrings);
149 TObjString *tokenString = (TObjString *)nextToken();
150 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
151 std::stringstream sstr;
152 double currentValue;
153 sstr << tokenString->GetString().Data();
154 sstr >> currentValue;
155 values.push_back(currentValue);
156 }
157 return values;
158}
159
160////////////////////////////////////////////////////////////////////////////////
162{
163 // Set default values for all option strings
164
165 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
166
167 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
168
169 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
170
171 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
172 " or cross entropy (binary classification).");
173 AddPreDefVal(TString("CROSSENTROPY"));
174 AddPreDefVal(TString("SUMOFSQUARES"));
175 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
176
177 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
178 AddPreDefVal(TString("XAVIER"));
179 AddPreDefVal(TString("XAVIERUNIFORM"));
180 AddPreDefVal(TString("GAUSS"));
181 AddPreDefVal(TString("UNIFORM"));
182 AddPreDefVal(TString("IDENTITY"));
183 AddPreDefVal(TString("ZERO"));
184
185 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
186
187 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
188 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
189 "Specify as 100 to use exactly 100 events. (Default: 20%)");
190
191 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
192 AddPreDefVal(TString("STANDARD"));
193 AddPreDefVal(TString("CPU"));
194 AddPreDefVal(TString("GPU"));
195 AddPreDefVal(TString("OPENCL"));
196 AddPreDefVal(TString("CUDNN"));
197
198 // define training stratgey separated by a separator "|"
199 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
200 "Momentum=0.3,"
201 "Repetitions=3,"
202 "ConvergenceSteps=50,"
203 "BatchSize=30,"
204 "TestRepetitions=7,"
205 "WeightDecay=0.0,"
206 "Regularization=None,"
207 "DropConfig=0.0,"
208 "DropRepetitions=5"
209 "|"
210 "LearningRate=1e-4,"
211 "Momentum=0.3,"
212 "Repetitions=3,"
213 "ConvergenceSteps=50,"
214 "MaxEpochs=2000,"
215 "BatchSize=20,"
216 "TestRepetitions=7,"
217 "WeightDecay=0.001,"
218 "Regularization=L2,"
219 "DropConfig=0.0+0.5+0.5,"
220 "DropRepetitions=5,"
221 "Multithreading=True",
222 "TrainingStrategy", "Defines the training strategies.");
223}
224
225////////////////////////////////////////////////////////////////////////////////
227{
228
230 Log() << kINFO << "Will ignore negative events in training!" << Endl;
231 }
232
233 if (fArchitectureString == "STANDARD") {
234 Log() << kINFO << "The STANDARD architecture has been deprecated. "
235 "Please use Architecture=CPU or Architecture=CPU."
236 "See the TMVA Users' Guide for instructions if you "
237 "encounter problems."
238 << Endl;
239 }
240 if (fArchitectureString == "OPENCL") {
241 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
242 "Please use Architecture=CPU or Architecture=CPU for the "
243 "time being. See the TMVA Users' Guide for instructions "
244 "if you encounter problems."
245 << Endl;
246 }
247
248 // the architecture can now be set at runtime as an option
249
250
251 if (fArchitectureString == "GPU") {
252#ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
253 Log() << kERROR << "CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
256 << Endl;
257#ifdef R__HAS_TMVACPU
258 fArchitectureString = "CPU";
259 Log() << kINFO << "Will now use the CPU architecture !" << Endl;
260#else
261 fArchitectureString = "Standard";
262 Log() << kINFO << "Will now use the Standard architecture !" << Endl;
263#endif
264#else
265 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
266#endif
267 }
268 else if (fArchitectureString == "CUDNN") {
269#ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
270 Log() << kERROR << "CUDA+CUDNN backend not enabled. Please make sure "
271 "you have CUDNN and CUDA installed and that the GPU capability/CUDA "
272 "was successfully detected by CMAKE by using -Dtmva-gpu=On"
273 << Endl;
274#ifdef R__HAS_TMVACPU
275 fArchitectureString = "CPU";
276 Log() << kINFO << "Will now use the CPU architecture !" << Endl;
277#else
278 fArchitectureString = "Standard";
279 Log() << kINFO << "Will now use the Standard architecture !" << Endl;
280#endif
281#else
282 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
283#endif
284 }
285
286 else if (fArchitectureString == "CPU") {
287#ifndef R__HAS_TMVACPU // TMVA has no CPU support
288 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
289 "you have a BLAS implementation and it was successfully "
290 "detected by CMake as well that the imt CMake flag is set."
291 << Endl;
292#ifdef R__HAS_TMVAGPU
293 fArchitectureString = "GPU";
294 Log() << kINFO << "Will now use the GPU architecture !" << Endl;
295#else
296 fArchitectureString = "STANDARD";
297 Log() << kINFO << "Will now use the Standard architecture !" << Endl;
298#endif
299#else
300 Log() << kINFO << "Will now use the CPU architecture !" << Endl;
301#endif
302 }
303
304 else {
305 Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;
306 fArchitectureString = "STANDARD";
307 }
308
309 // Input Layout
312
313 // Loss function and output.
316 if (fErrorStrategy == "SUMOFSQUARES") {
317 fLossFunction = ELossFunction::kMeanSquaredError;
318 }
319 if (fErrorStrategy == "CROSSENTROPY") {
321 }
323 } else if (fAnalysisType == Types::kRegression) {
324 if (fErrorStrategy != "SUMOFSQUARES") {
325 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
326 << " neural net error function. Setting error function to "
327 << " SUMOFSQUARES now." << Endl;
328 }
329
330 fLossFunction = ELossFunction::kMeanSquaredError;
332 } else if (fAnalysisType == Types::kMulticlass) {
333 if (fErrorStrategy == "SUMOFSQUARES") {
334 fLossFunction = ELossFunction::kMeanSquaredError;
335 }
336 if (fErrorStrategy == "CROSSENTROPY") {
338 }
339 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
340 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
341 }
343 }
344
345 // Initialization
346 // the biases will be always initialized to zero
347 if (fWeightInitializationString == "XAVIER") {
349 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
351 } else if (fWeightInitializationString == "GAUSS") {
353 } else if (fWeightInitializationString == "UNIFORM") {
355 } else if (fWeightInitializationString == "ZERO") {
357 } else if (fWeightInitializationString == "IDENTITY") {
359 } else {
361 }
362
363 // Training settings.
364
366 for (auto &block : strategyKeyValues) {
367 TTrainingSettings settings;
368
369 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
370 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
371 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
372 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
373 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
374 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
375 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
376 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
377
378 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
379 if (regularization == "L1") {
381 } else if (regularization == "L2") {
383 } else {
385 }
386
387 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
388 settings.optimizerName = optimizer;
389 if (optimizer == "SGD") {
391 } else if (optimizer == "ADAM") {
393 } else if (optimizer == "ADAGRAD") {
395 } else if (optimizer == "RMSPROP") {
397 } else if (optimizer == "ADADELTA") {
399 } else {
400 // Make Adam as default choice if the input string is
401 // incorrect.
403 settings.optimizerName = "ADAM";
404 }
405
406
407 TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
408
409 if (strMultithreading.BeginsWith("T")) {
410 settings.multithreading = true;
411 } else {
412 settings.multithreading = false;
413 }
414
415 fTrainingSettings.push_back(settings);
416 }
417
418 this->SetBatchSize(fTrainingSettings.front().batchSize);
419
420 // case inputlayout and batch layout was not given. Use default then
421 // (1, batchsize, nvariables)
422 // fInputShape[0] -> BatchSize
423 // fInputShape[1] -> InputDepth
424 // fInputShape[2] -> InputHeight
425 // fInputShape[3] -> InputWidth
426 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
427 fInputShape[1] = 1;
428 fInputShape[2] = 1;
430 }
431 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
432 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
433 // case of (1, batchsize, input features)
434 fBatchDepth = 1;
435 fBatchHeight = fTrainingSettings.front().batchSize;
437 }
438 else { // more general cases (e.g. for CNN)
439 fBatchDepth = fTrainingSettings.front().batchSize;
442 }
443 }
444}
445
446////////////////////////////////////////////////////////////////////////////////
447/// default initializations
449{
450 // Nothing to do here
451}
452
453////////////////////////////////////////////////////////////////////////////////
454/// Parse the input layout
456{
457 // Define the delimiter
458 const TString delim("|");
459
460 // Get the input layout string
461 TString inputLayoutString = this->GetInputLayoutString();
462
463 // Split the input layout string
464 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
465 TIter nextInputDim(inputDimStrings);
466 TObjString *inputDimString = (TObjString *)nextInputDim();
467
468 // Go through every token and save its absolute value in the shape array
469 // The first token is the batch size for easy compatibility with cudnn
470 int subDim = 1;
471 std::vector<size_t> inputShape;
472 inputShape.reserve(inputLayoutString.Length()/2 + 2);
473 inputShape.push_back(30); // Will be set by Trainingsettings, use default now
474 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
475 // size_t is unsigned
476 subDim = (size_t) abs(inputDimString->GetString().Atoi());
477 // Size among unused dimensions should be set to 1 for cudnn
478 //if (subDim == 0) subDim = 1;
479 inputShape.push_back(subDim);
480 }
481
482 this->SetInputShape(inputShape);
483}
484
485////////////////////////////////////////////////////////////////////////////////
486/// Parse the input layout
488{
489 // Define the delimiter
490 const TString delim("|");
491
492 // Get the input layout string
493 TString batchLayoutString = this->GetBatchLayoutString();
494
495 size_t batchDepth = 0;
496 size_t batchHeight = 0;
497 size_t batchWidth = 0;
498
499 // Split the input layout string
500 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
501 TIter nextBatchDim(batchDimStrings);
502 TObjString *batchDimString = (TObjString *)nextBatchDim();
503 int idxToken = 0;
504
505 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
506 switch (idxToken) {
507 case 0: // input depth
508 {
509 TString strDepth(batchDimString->GetString());
510 batchDepth = (size_t)strDepth.Atoi();
511 } break;
512 case 1: // input height
513 {
514 TString strHeight(batchDimString->GetString());
515 batchHeight = (size_t)strHeight.Atoi();
516 } break;
517 case 2: // input width
518 {
519 TString strWidth(batchDimString->GetString());
520 batchWidth = (size_t)strWidth.Atoi();
521 } break;
522 }
523 ++idxToken;
524 }
525
526 this->SetBatchDepth(batchDepth);
527 this->SetBatchHeight(batchHeight);
528 this->SetBatchWidth(batchWidth);
529}
530
531////////////////////////////////////////////////////////////////////////////////
532/// Create a deep net based on the layout string
533template <typename Architecture_t, typename Layer_t>
536{
537 // Layer specification, layer details
538 const TString layerDelimiter(",");
539 const TString subDelimiter("|");
540
541 TString layoutString = this->GetLayoutString();
542
543 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
544
545 // Split layers
546 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
547 TIter nextLayer(layerStrings);
548 TObjString *layerString = (TObjString *)nextLayer();
549
550
551 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
552
553 // Split layer details
554 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
555 TIter nextToken(subStrings);
556 TObjString *token = (TObjString *)nextToken();
557
558 // Determine the type of the layer
559 TString strLayerType = token->GetString();
560
561
562 if (strLayerType == "DENSE") {
563 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
564 } else if (strLayerType == "CONV") {
565 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
566 } else if (strLayerType == "MAXPOOL") {
567 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
568 } else if (strLayerType == "RESHAPE") {
569 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
570 } else if (strLayerType == "BNORM") {
571 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
572 } else if (strLayerType == "RNN") {
573 ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
574 // } else if (strLayerType == "LSTM") {
575 // Log() << kError << "LSTM Layer is not yet fully implemented" << Endl;
576 // //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);
577 // break;
578 } else {
579 // no type of layer specified - assume is dense layer as in old DNN interface
580 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
581 }
582 }
583}
584
585////////////////////////////////////////////////////////////////////////////////
586/// Pases the layer string and creates the appropriate dense layer
587template <typename Architecture_t, typename Layer_t>
589 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
590 TString delim)
591{
592 int width = 0;
594
595 // this return number of input variables for the method
596 // it can be used to deduce width of dense layer if specified as N+10
597 // where N is the number of input variables
598 const size_t inputSize = GetNvar();
599
600 // Split layer details
601 TObjArray *subStrings = layerString.Tokenize(delim);
602 TIter nextToken(subStrings);
603 TObjString *token = (TObjString *)nextToken();
604 int idxToken = 0;
605
606 // loop on the tokens
607 // order of sepcifying width and activation function is not relevant
608 // both 100|TANH and TANH|100 are valid cases
609 for (; token != nullptr; token = (TObjString *)nextToken()) {
610 idxToken++;
611 // try a match with the activation function
612 TString strActFnc(token->GetString());
613 // if first token defines the layer type- skip it
614 if (strActFnc =="DENSE") continue;
615
616 if (strActFnc == "RELU") {
617 activationFunction = DNN::EActivationFunction::kRelu;
618 } else if (strActFnc == "TANH") {
619 activationFunction = DNN::EActivationFunction::kTanh;
620 } else if (strActFnc == "SYMMRELU") {
621 activationFunction = DNN::EActivationFunction::kSymmRelu;
622 } else if (strActFnc == "SOFTSIGN") {
623 activationFunction = DNN::EActivationFunction::kSoftSign;
624 } else if (strActFnc == "SIGMOID") {
625 activationFunction = DNN::EActivationFunction::kSigmoid;
626 } else if (strActFnc == "LINEAR") {
627 activationFunction = DNN::EActivationFunction::kIdentity;
628 } else if (strActFnc == "GAUSS") {
629 activationFunction = DNN::EActivationFunction::kGauss;
630 } else if (width == 0) {
631 // no match found try to parse as text showing the width
632 // support for input a formula where the variable 'x' is 'N' in the string
633 // use TFormula for the evaluation
634 TString strNumNodes = strActFnc;
635 // number of nodes
636 TString strN("x");
637 strNumNodes.ReplaceAll("N", strN);
638 strNumNodes.ReplaceAll("n", strN);
639 TFormula fml("tmp", strNumNodes);
640 width = fml.Eval(inputSize);
641 }
642
643 }
644 // avoid zero width. assume is 1
645 if (width == 0) width = 1;
646
647 // Add the dense layer, initialize the weights and biases and copy
648 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649 denseLayer->Initialize();
650
651 // add same layer to fNet
652 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
653
654 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
655
656 // add the copy to all slave nets
657 //for (size_t i = 0; i < nets.size(); i++) {
658 // nets[i].AddDenseLayer(copyDenseLayer);
659 //}
660
661 // check compatibility of added layer
662 // for a dense layer input should be 1 x 1 x DxHxW
663}
664
665////////////////////////////////////////////////////////////////////////////////
666/// Pases the layer string and creates the appropriate convolutional layer
667template <typename Architecture_t, typename Layer_t>
669 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
670 TString delim)
671{
672 int depth = 0;
673 int fltHeight = 0;
674 int fltWidth = 0;
675 int strideRows = 0;
676 int strideCols = 0;
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
680
681 // Split layer details
682 TObjArray *subStrings = layerString.Tokenize(delim);
683 TIter nextToken(subStrings);
684 TObjString *token = (TObjString *)nextToken();
685 int idxToken = 0;
686
687 for (; token != nullptr; token = (TObjString *)nextToken()) {
688 switch (idxToken) {
689 case 1: // depth
690 {
691 TString strDepth(token->GetString());
692 depth = strDepth.Atoi();
693 } break;
694 case 2: // filter height
695 {
696 TString strFltHeight(token->GetString());
697 fltHeight = strFltHeight.Atoi();
698 } break;
699 case 3: // filter width
700 {
701 TString strFltWidth(token->GetString());
702 fltWidth = strFltWidth.Atoi();
703 } break;
704 case 4: // stride in rows
705 {
706 TString strStrideRows(token->GetString());
707 strideRows = strStrideRows.Atoi();
708 } break;
709 case 5: // stride in cols
710 {
711 TString strStrideCols(token->GetString());
712 strideCols = strStrideCols.Atoi();
713 } break;
714 case 6: // zero padding height
715 {
716 TString strZeroPadHeight(token->GetString());
717 zeroPadHeight = strZeroPadHeight.Atoi();
718 } break;
719 case 7: // zero padding width
720 {
721 TString strZeroPadWidth(token->GetString());
722 zeroPadWidth = strZeroPadWidth.Atoi();
723 } break;
724 case 8: // activation function
725 {
726 TString strActFnc(token->GetString());
727 if (strActFnc == "RELU") {
728 activationFunction = DNN::EActivationFunction::kRelu;
729 } else if (strActFnc == "TANH") {
730 activationFunction = DNN::EActivationFunction::kTanh;
731 } else if (strActFnc == "SYMMRELU") {
732 activationFunction = DNN::EActivationFunction::kSymmRelu;
733 } else if (strActFnc == "SOFTSIGN") {
734 activationFunction = DNN::EActivationFunction::kSoftSign;
735 } else if (strActFnc == "SIGMOID") {
736 activationFunction = DNN::EActivationFunction::kSigmoid;
737 } else if (strActFnc == "LINEAR") {
738 activationFunction = DNN::EActivationFunction::kIdentity;
739 } else if (strActFnc == "GAUSS") {
740 activationFunction = DNN::EActivationFunction::kGauss;
741 }
742 } break;
743 }
744 ++idxToken;
745 }
746
747 // Add the convolutional layer, initialize the weights and biases and copy
748 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749 zeroPadHeight, zeroPadWidth, activationFunction);
750 convLayer->Initialize();
751
752 // Add same layer to fNet
753 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
755
756 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
757
758 //// add the copy to all slave nets
759 //for (size_t i = 0; i < nets.size(); i++) {
760 // nets[i].AddConvLayer(copyConvLayer);
761 //}
762}
763
764////////////////////////////////////////////////////////////////////////////////
765/// Pases the layer string and creates the appropriate max pool layer
766template <typename Architecture_t, typename Layer_t>
768 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
769 TString delim)
770{
771
772 int filterHeight = 0;
773 int filterWidth = 0;
774 int strideRows = 0;
775 int strideCols = 0;
776
777 // Split layer details
778 TObjArray *subStrings = layerString.Tokenize(delim);
779 TIter nextToken(subStrings);
780 TObjString *token = (TObjString *)nextToken();
781 int idxToken = 0;
782
783 for (; token != nullptr; token = (TObjString *)nextToken()) {
784 switch (idxToken) {
785 case 1: // filter height
786 {
787 TString strFrmHeight(token->GetString());
788 filterHeight = strFrmHeight.Atoi();
789 } break;
790 case 2: // filter width
791 {
792 TString strFrmWidth(token->GetString());
793 filterWidth = strFrmWidth.Atoi();
794 } break;
795 case 3: // stride in rows
796 {
797 TString strStrideRows(token->GetString());
798 strideRows = strStrideRows.Atoi();
799 } break;
800 case 4: // stride in cols
801 {
802 TString strStrideCols(token->GetString());
803 strideCols = strStrideCols.Atoi();
804 } break;
805 }
806 ++idxToken;
807 }
808
809 // Add the Max pooling layer
810 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
811 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
812
813 // Add the same layer to fNet
814 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815
816
817 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
818
819 //// add the copy to all slave nets
820 //for (size_t i = 0; i < nets.size(); i++) {
821 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
822 //}
823}
824
825////////////////////////////////////////////////////////////////////////////////
826/// Pases the layer string and creates the appropriate reshape layer
827template <typename Architecture_t, typename Layer_t>
829 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
830 TString delim)
831{
832 int depth = 0;
833 int height = 0;
834 int width = 0;
835 bool flattening = false;
836
837 // Split layer details
838 TObjArray *subStrings = layerString.Tokenize(delim);
839 TIter nextToken(subStrings);
840 TObjString *token = (TObjString *)nextToken();
841 int idxToken = 0;
842
843 for (; token != nullptr; token = (TObjString *)nextToken()) {
844 if (token->GetString() == "FLAT") idxToken=4;
845 switch (idxToken) {
846 case 1: {
847 TString strDepth(token->GetString());
848 depth = strDepth.Atoi();
849 } break;
850 case 2: // height
851 {
852 TString strHeight(token->GetString());
853 height = strHeight.Atoi();
854 } break;
855 case 3: // width
856 {
857 TString strWidth(token->GetString());
858 width = strWidth.Atoi();
859 } break;
860 case 4: // flattening
861 {
862 TString flat(token->GetString());
863 if (flat == "FLAT") {
864 flattening = true;
865 }
866 } break;
867 }
868 ++idxToken;
869 }
870
871 // Add the reshape layer
872 // TReshapeLayer<Architecture_t> *reshapeLayer =
873 deepNet.AddReshapeLayer(depth, height, width, flattening);
874
875 // Add the same layer to fNet
876 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
877
878 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
879
880 //// add the copy to all slave nets
881 //for (size_t i = 0; i < nets.size(); i++) {
882 // nets[i].AddReshapeLayer(copyReshapeLayer);
883 //}
884}
885
886////////////////////////////////////////////////////////////////////////////////
887/// Pases the layer string and creates the appropriate reshape layer
888template <typename Architecture_t, typename Layer_t>
890 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
891 TString delim)
892{
893
894 // default values
895 double momentum = -1; //0.99;
896 double epsilon = 0.0001;
897
898 // Split layer details
899 TObjArray *subStrings = layerString.Tokenize(delim);
900 TIter nextToken(subStrings);
901 TObjString *token = (TObjString *)nextToken();
902 int idxToken = 0;
903
904 for (; token != nullptr; token = (TObjString *)nextToken()) {
905 switch (idxToken) {
906 case 1: {
907 momentum = std::atof(token->GetString().Data());
908 } break;
909 case 2: // height
910 {
911 epsilon = std::atof(token->GetString().Data());
912 } break;
913 }
914 ++idxToken;
915 }
916
917 // Add the batch norm layer
918 //
919 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
920 layer->Initialize();
921
922 // Add the same layer to fNet
923 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
924
925}
926
927////////////////////////////////////////////////////////////////////////////////
928/// Pases the layer string and creates the appropriate rnn layer
929template <typename Architecture_t, typename Layer_t>
931 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
932 TString delim)
933{
934 // int depth = 0;
935 int stateSize = 0;
936 int inputSize = 0;
937 int timeSteps = 0;
938 bool rememberState = false;
939
940 // Split layer details
941 TObjArray *subStrings = layerString.Tokenize(delim);
942 TIter nextToken(subStrings);
943 TObjString *token = (TObjString *)nextToken();
944 int idxToken = 0;
945
946 for (; token != nullptr; token = (TObjString *)nextToken()) {
947 switch (idxToken) {
948 case 1: // state size
949 {
950 TString strstateSize(token->GetString());
951 stateSize = strstateSize.Atoi();
952 } break;
953 case 2: // input size
954 {
955 TString strinputSize(token->GetString());
956 inputSize = strinputSize.Atoi();
957 } break;
958 case 3: // time steps
959 {
960 TString strtimeSteps(token->GetString());
961 timeSteps = strtimeSteps.Atoi();
962 }
963 case 4: // remember state (1 or 0)
964 {
965 TString strrememberState(token->GetString());
966 rememberState = (bool) strrememberState.Atoi();
967 } break;
968 }
969 ++idxToken;
970 }
971
972 // Add the recurrent layer, initialize the weights and biases and copy
973 TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
974 timeSteps, rememberState);
975 basicRNNLayer->Initialize();
976
977 // Add same layer to fNet
978 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
979
980 //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);
981
982 //// add the copy to all slave nets
983 //for (size_t i = 0; i < nets.size(); i++) {
984 // nets[i].AddBasicRNNLayer(copyRNNLayer);
985 //}
986}
987
988////////////////////////////////////////////////////////////////////////////////
989/// Pases the layer string and creates the appropriate lstm layer
990template <typename Architecture_t, typename Layer_t>
992 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
993 TString delim)
994{
995 // Split layer details
996 TObjArray *subStrings = layerString.Tokenize(delim);
997 TIter nextToken(subStrings);
998 TObjString *token = (TObjString *)nextToken();
999 int idxToken = 0;
1000
1001 for (; token != nullptr; token = (TObjString *)nextToken()) {
1002 switch (idxToken) {
1003 }
1004 ++idxToken;
1005 }
1006}
1007
1008////////////////////////////////////////////////////////////////////////////////
1009/// Standard constructor.
1010MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1011 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1012 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1013 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1014 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1015 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1016 fXInput()
1017{
1018 // Nothing to do here
1019}
1020
1021////////////////////////////////////////////////////////////////////////////////
1022/// Constructor from a weight file.
1023MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1024 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1025 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1026 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1027 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1028 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1029 fXInput()
1030{
1031 // Nothing to do here
1032}
1033
1034////////////////////////////////////////////////////////////////////////////////
1035/// Destructor.
1037{
1038 // Nothing to do here
1039}
1040
1041////////////////////////////////////////////////////////////////////////////////
1042/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1043auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1044{
1045 // remove empty spaces
1046 parseString.ReplaceAll(" ","");
1047 KeyValueVector_t blockKeyValues;
1048 const TString keyValueDelim("=");
1049
1050 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1051 TIter nextBlock(blockStrings);
1052 TObjString *blockString = (TObjString *)nextBlock();
1053
1054 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1055 blockKeyValues.push_back(std::map<TString, TString>());
1056 std::map<TString, TString> &currentBlock = blockKeyValues.back();
1057
1058 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1059 TIter nextToken(subStrings);
1060 TObjString *token = (TObjString *)nextToken();
1061
1062 for (; token != nullptr; token = (TObjString *)nextToken()) {
1063 TString strKeyValue(token->GetString());
1064 int delimPos = strKeyValue.First(keyValueDelim.Data());
1065 if (delimPos <= 0) continue;
1066
1067 TString strKey = TString(strKeyValue(0, delimPos));
1068 strKey.ToUpper();
1069 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1070
1071 strKey.Strip(TString::kBoth, ' ');
1072 strValue.Strip(TString::kBoth, ' ');
1073
1074 currentBlock.insert(std::make_pair(strKey, strValue));
1075 }
1076 }
1077 return blockKeyValues;
1078}
1079
1080////////////////////////////////////////////////////////////////////////////////
1081/// What kind of analysis type can handle the CNN
1083{
1084 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1085 if (type == Types::kMulticlass) return kTRUE;
1086 if (type == Types::kRegression) return kTRUE;
1087
1088 return kFALSE;
1089}
1090
1091////////////////////////////////////////////////////////////////////////////////
1092/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1093/// 100 etc.
1094/// - 20% and 0.2 selects 20% of the training set as validation data.
1095/// - 100 selects 100 events as the validation data.
1096///
1097/// @return number of samples in validation set
1098///
1100{
1101 Int_t nValidationSamples = 0;
1102 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1103
1104 // Parsing + Validation
1105 // --------------------
1106 if (fNumValidationString.EndsWith("%")) {
1107 // Relative spec. format 20%
1108 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1109
1110 if (intValStr.IsFloat()) {
1111 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1112 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1113 } else {
1114 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1115 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1116 }
1117 } else if (fNumValidationString.IsFloat()) {
1118 Double_t valSizeAsDouble = fNumValidationString.Atof();
1119
1120 if (valSizeAsDouble < 1.0) {
1121 // Relative spec. format 0.2
1122 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1123 } else {
1124 // Absolute spec format 100 or 100.0
1125 nValidationSamples = valSizeAsDouble;
1126 }
1127 } else {
1128 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1129 << Endl;
1130 }
1131
1132 // Value validation
1133 // ----------------
1134 if (nValidationSamples < 0) {
1135 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1136 }
1137
1138 if (nValidationSamples == 0) {
1139 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1140 }
1141
1142 if (nValidationSamples >= (Int_t)trainingSetSize) {
1143 Log() << kFATAL << "Validation size \"" << fNumValidationString
1144 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1145 }
1146
1147 return nValidationSamples;
1148}
1149
1150
1151////////////////////////////////////////////////////////////////////////////////
1152/// Implementation of architecture specific train method
1153///
1154template <typename Architecture_t>
1156{
1157
1158 using Scalar_t = typename Architecture_t::Scalar_t;
1161 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1162
1163 bool debug = Log().GetMinType() == kDEBUG;
1164
1165
1166 // Determine the number of outputs
1167 // // size_t outputSize = 1;
1168 // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
1169 // // outputSize = GetNTargets();
1170 // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
1171 // // outputSize = DataInfo().GetNClasses();
1172 // // }
1173
1174 // set the random seed for weight initialization
1175 Architecture_t::SetRandomSeed(fRandomSeed);
1176
1177 ///split training data in training and validation data
1178 // and determine the number of training and testing examples
1179
1180 size_t nValidationSamples = GetNumValidationSamples();
1181 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1182
1183 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1186
1187 size_t trainingPhase = 1;
1188
1189 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1190
1191 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1192
1193
1194 // After the processing of the options, initialize the master deep net
1195 size_t batchSize = settings.batchSize;
1196 this->SetBatchSize(batchSize);
1197 // Should be replaced by actual implementation. No support for this now.
1198 size_t inputDepth = this->GetInputDepth();
1199 size_t inputHeight = this->GetInputHeight();
1200 size_t inputWidth = this->GetInputWidth();
1201 size_t batchDepth = this->GetBatchDepth();
1202 size_t batchHeight = this->GetBatchHeight();
1203 size_t batchWidth = this->GetBatchWidth();
1204 ELossFunction J = this->GetLossFunction();
1206 ERegularization R = settings.regularization;
1207 EOptimizer O = settings.optimizer;
1208 Scalar_t weightDecay = settings.weightDecay;
1209
1210 //Batch size should be included in batch layout as well. There are two possibilities:
1211 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1212 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1213 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1214 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1215
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1218 return;
1219 }
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1222 return;
1223 }
1224
1225
1226 //check also that input layout compatible with batch layout
1227 bool badLayout = false;
1228 // case batch depth == batch size
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1231 // case batch Height is batch size
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1234 if (badLayout) {
1235 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1237 return;
1238 }
1239
1240 // check batch size is compatible with number of events
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL << "Number of samples in the datasets are train: ("
1243 << nTrainingSamples << ") test: (" << nValidationSamples
1244 << "). One of these is smaller than the batch size of "
1245 << settings.batchSize << ". Please increase the batch"
1246 << " size to be at least the same size as the smallest"
1247 << " of them." << Endl;
1248 }
1249
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1251
1252 // create a copy of DeepNet for evaluating but with batch size = 1
1253 // fNet is the saved network and will be with CPU or Referrence architecture
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1256 batchHeight, batchWidth, J, I, R, weightDecay));
1257 fBuildNet = true;
1258 }
1259 else
1260 fBuildNet = false;
1261
1262 // Initialize the vector of slave nets
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (size_t i = 0; i < nThreads; i++) {
1266 // create a copies of the master deep net
1267 nets.push_back(deepNet);
1268 }
1269
1270
1271 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1272 CreateDeepNet(deepNet, nets);
1273
1274
1275 // set droput probabilities
1276 // use convention to store in the layer 1.- dropout probabilities
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (auto & p : dropoutVector) {
1279 p = 1.0 - p;
1280 }
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1282
1283 if (trainingPhase > 1) {
1284 // copy initial weights from fNet to deepnet
1285 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1287 }
1288 }
1289
1290 // when fNet is built create also input matrix that will be used to evaluate it
1291 if (fBuildNet) {
1292 //int n1 = batchHeight;
1293 //int n2 = batchWidth;
1294 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1295 //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1296 //fXInput = TensorImpl_t(1,n1,n2);
1298 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1301
1302
1303 // create pointer to output matrix used for the predictions
1304 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1305
1306 // print the created network
1307 Log() << "***** Deep Learning Network *****" << Endl;
1308 if (Log().GetMinType() <= kINFO)
1309 deepNet.Print();
1310 }
1311 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1312
1313 // Loading the training and validation datasets
1314 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1315 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1319
1320 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1321 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1325
1326
1327
1328 // do an evaluation of the network to compute initial minimum test error
1329
1330 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1331
1332 Double_t minValError = 0.0;
1333 Log() << "Compute initial loss on the validation data " << Endl;
1334 for (auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1338
1339 //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1340 // should we apply droput to the loss ??
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1342 }
1343 // add Regularization term
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1346 minValError += regzTerm;
1347
1348
1349 // create a pointer to base class VOptimizer
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1351
1352 // initialize the base class pointer with the corresponding derived class object.
1353 switch (O) {
1354
1355 case EOptimizer::kSGD:
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1357 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1358 break;
1359
1360 case EOptimizer::kAdam:
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1362 new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1363 break;
1364
1365 case EOptimizer::kAdagrad:
1366 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1367 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1368 break;
1369
1370 case EOptimizer::kRMSProp:
1371 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1372 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1373 break;
1374
1375 case EOptimizer::kAdadelta:
1376 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1377 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1378 break;
1379 }
1380
1381
1382 // Initialize the vector of batches, one batch for one slave network
1383 std::vector<TTensorBatch<Architecture_t>> batches{};
1384
1385 bool converged = false;
1386 size_t convergenceCount = 0;
1387 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1388
1389 // start measuring
1390 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1391 tstart = std::chrono::system_clock::now();
1392
1393 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1394 << " Optimizer " << settings.optimizerName
1395 << " Learning rate = " << settings.learningRate
1396 << " regularization " << (char) settings.regularization
1397 << " minimum error = " << minValError
1398 << Endl;
1399 if (!fInteractive) {
1400 std::string separator(62, '-');
1401 Log() << separator << Endl;
1402 Log() << std::setw(10) << "Epoch"
1403 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1404 << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1405 << std::setw(12) << "nEvents/s"
1406 << std::setw(12) << "Conv. Steps" << Endl;
1407 Log() << separator << Endl;
1408 }
1409
1410 // set up generator for shuffling the batches
1411 // if seed is zero we have always a different order in the batches
1412 size_t shuffleSeed = 0;
1413 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1414 RandomGenerator<TRandom3> rng(shuffleSeed);
1415
1416 // print weights before
1417 if (fBuildNet && debug) {
1418 Log() << "Initial Deep Net Weights " << Endl;
1419 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1420 for (size_t l = 0; l < weights_tensor.size(); ++l)
1421 weights_tensor[l].Print();
1422 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1423 bias_tensor[0].Print();
1424 }
1425
1426 Log() << " Start epoch iteration ..." << Endl;
1427 bool debugFirstEpoch = false;
1428 bool computeLossInTraining = true; // compute loss in training or at test time
1429 while (!converged) {
1430 optimizer->IncrementGlobalStep();
1431 trainingData.Shuffle(rng);
1432
1433 // execute all epochs
1434 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1435
1436 Double_t trainingError = 0;
1437 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1438 // Clean and load new batches, one batch for one slave net
1439 //batches.clear();
1440 //batches.reserve(nThreads);
1441 //for (size_t j = 0; j < nThreads; j++) {
1442 // batches.push_back(trainingData.GetTensorBatch());
1443 //}
1444 if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1445
1446 auto my_batch = trainingData.GetTensorBatch();
1447
1448 if (debugFirstEpoch)
1449 std::cout << "got batch data - doing forward \n";
1450
1451#ifdef DEBUG
1452
1453 Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1454 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1455 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1456 Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1457 Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1458#endif
1459
1460 deepNet.Forward(my_batch.GetInput(), true);
1461 // compute also loss
1462 if (computeLossInTraining) {
1463 auto outputMatrix = my_batch.GetOutput();
1464 auto weights = my_batch.GetWeights();
1465 trainingError += deepNet.Loss(outputMatrix, weights, false);
1466 }
1467
1468 if (debugFirstEpoch)
1469 std::cout << "- doing backward \n";
1470
1471#ifdef DEBUG
1472 size_t nlayers = deepNet.GetLayers().size();
1473 for (size_t l = 0; l < nlayers; ++l) {
1474 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1475 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1476 TString::Format("initial weights layer %d", l).Data());
1477
1478 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1479 TString::Format("output tensor layer %d", l).Data());
1480 }
1481#endif
1482
1483 //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1484
1485 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1486
1487 if (debugFirstEpoch)
1488 std::cout << "- doing optimizer update \n";
1489
1490 optimizer->Step();
1491
1492#ifdef DEBUG
1493 std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1494 for (size_t l = 0; l < nlayers; ++l) {
1495 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1496 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1497 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1498 }
1499 }
1500#endif
1501
1502 }
1503
1504 if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1505 //}
1506 debugFirstEpoch = false;
1507 if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1508
1509 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1510
1511 t1 = std::chrono::system_clock::now();
1512
1513 // Compute validation error.
1514
1515
1516 Double_t valError = 0.0;
1517 bool inTraining = false;
1518 for (auto batch : validationData) {
1519 auto inputTensor = batch.GetInput();
1520 auto outputMatrix = batch.GetOutput();
1521 auto weights = batch.GetWeights();
1522 // should we apply droput to the loss ??
1523 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1524 }
1525 // normalize loss to number of batches and add regularization term
1526 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1527 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1528 valError += regTerm;
1529
1530 //Log the loss value
1531 fTrainHistory.AddValue("valError",optimizer->GetGlobalStep(),valError);
1532
1533 t2 = std::chrono::system_clock::now();
1534
1535 // checking for convergence
1536 if (valError < minValError) {
1537 convergenceCount = 0;
1538 } else {
1539 convergenceCount += settings.testInterval;
1540 }
1541
1542 // copy configuration when reached a minimum error
1543 if (valError < minValError ) {
1544 // Copy weights from deepNet to fNet
1545 Log() << std::setw(10) << optimizer->GetGlobalStep()
1546 << " Minimum Test error found - save the configuration " << Endl;
1547 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1548 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1549 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1550 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1551 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1552 // }
1553 }
1554 // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1555 // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1556
1557 minValError = valError;
1558 }
1559 else if ( minValError <= 0. )
1560 minValError = valError;
1561
1562 if (!computeLossInTraining) {
1563 trainingError = 0.0;
1564 // Compute training error.
1565 for (auto batch : trainingData) {
1566 auto inputTensor = batch.GetInput();
1567 auto outputMatrix = batch.GetOutput();
1568 auto weights = batch.GetWeights();
1569 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1570 }
1571 }
1572 // normalize loss to number of batches and add regularization term
1573 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1574 trainingError += regTerm;
1575
1576 //Log the loss value
1577 fTrainHistory.AddValue("trainingError",optimizer->GetGlobalStep(),trainingError);
1578
1579 // stop measuring
1580 tend = std::chrono::system_clock::now();
1581
1582 // Compute numerical throughput.
1583 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1584 std::chrono::duration<double> elapsed1 = t1-tstart;
1585 // std::chrono::duration<double> elapsed2 = t2-tstart;
1586 // time to compute training and test errors
1587 std::chrono::duration<double> elapsed_testing = tend-t1;
1588
1589 double seconds = elapsed_seconds.count();
1590 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1591 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1592 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1593
1594 converged =
1595 convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1596
1597
1598 Log() << std::setw(10) << optimizer->GetGlobalStep() << " | "
1599 << std::setw(12) << trainingError
1600 << std::setw(12) << valError
1601 << std::setw(12) << seconds / settings.testInterval
1602 << std::setw(12) << elapsed_testing.count()
1603 << std::setw(12) << 1. / eventTime
1604 << std::setw(12) << convergenceCount
1605 << Endl;
1606
1607 if (converged) {
1608 Log() << Endl;
1609 }
1610 tstart = std::chrono::system_clock::now();
1611 }
1612
1613 // if (stepCount % 10 == 0 || converged) {
1614 if (converged && debug) {
1615 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << optimizer->GetGlobalStep()
1616 << Endl;
1617 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1618 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1619 for (size_t l = 0; l < weights_tensor.size(); ++l)
1620 weights_tensor[l].Print();
1621 bias_tensor[0].Print();
1622 }
1623
1624 }
1625
1626 trainingPhase++;
1627 } // end loop on training Phase
1628}
1629
1630////////////////////////////////////////////////////////////////////////////////
1632{
1633 if (fInteractive) {
1634 Log() << kFATAL << "Not implemented yet" << Endl;
1635 return;
1636 }
1637
1638 // using for training same scalar type defined for the prediction
1639 if (this->GetArchitectureString() == "GPU") {
1640#ifdef R__HAS_TMVAGPU
1641 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1642#ifdef R__HAS_CUDNN
1643 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1644#else
1645 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1646#endif
1647#else
1648 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1649 "you have CUDA installed and it was successfully "
1650 "detected by CMAKE."
1651 << Endl;
1652 return;
1653#endif
1654 } else if (this->GetArchitectureString() == "OPENCL") {
1655 Log() << kFATAL << "OPENCL backend not yet supported." << Endl;
1656 return;
1657 } else if (this->GetArchitectureString() == "CPU") {
1658#ifdef R__HAS_TMVACPU
1659 // note that number of threads used for BLAS might be different
1660 // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1661 Log() << kINFO << "Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "
1662 << gConfig().GetNCpu() << Endl << Endl;
1663 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1664#else
1665 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1666 "you have a BLAS implementation and it was successfully "
1667 "detected by CMake as well that the imt CMake flag is set."
1668 << Endl;
1669 return;
1670#endif
1671 } else if (this->GetArchitectureString() == "STANDARD") {
1672 Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1673#if HAVE_REFERENCE
1674 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1675#endif
1676 }
1677 else {
1678 Log() << kFATAL << this->GetArchitectureString() <<
1679 " is not a supported archiectire for TMVA::MethodDL"
1680 << Endl;
1681 }
1682
1683// /// definitions for CUDA
1684// #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.
1685// using Architecture_t = DNN::TCuda<Double_t>;
1686// #else
1687// #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.
1688// using Architecture_t = DNN::TCpu<Double_t>;
1689// #else
1690// using Architecture_t = DNN::TReference<Double_t>;
1691// #endif
1692// #endif
1693}
1694
1695
1696////////////////////////////////////////////////////////////////////////////////
1697Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1698{
1699
1700 // note that fNet should have been build with a batch size of 1
1701
1702 if (!fNet || fNet->GetDepth() == 0) {
1703 Log() << kFATAL << "The network has not been trained and fNet is not built"
1704 << Endl;
1705 }
1706
1707 // input size must be equal to 1 which is the batch size of fNet
1708 R__ASSERT(fNet->GetBatchSize() == 1);
1709
1710 // int batchWidth = fNet->GetBatchWidth();
1711 // int batchDepth = fNet->GetBatchDepth();
1712 // int batchHeight = fNet->GetBatchHeight();
1713// int noutput = fNet->GetOutputWidth();
1714
1715
1716 // get current event
1717 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1718
1719 size_t nVariables = GetEvent()->GetNVariables();
1720
1721 // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1723 R__ASSERT(fXInput.GetShape().size() < 4);
1724 size_t nc, nhw = 0;
1725 if (fXInput.GetShape().size() == 2) {
1726 nc = fXInput.GetShape()[0];
1727 if (nc != 1 ) {
1729 Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1730 << Endl;
1731 }
1732 nhw = fXInput.GetShape()[1];
1733 } else {
1734 nc = fXInput.GetCSize();
1735 nhw = fXInput.GetWSize();
1736 }
1737 if ( nVariables != nc * nhw) {
1738 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1739 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1740 << Endl;
1741 }
1742 for (size_t j = 0; j < nc; j++) {
1743 for (size_t k = 0; k < nhw; k++) {
1744 // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1745 fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1746 }
1747 }
1748 } else {
1749 // row-wise layout
1750 assert(fXInput.GetShape().size() >= 4);
1751 size_t nc = fXInput.GetCSize();
1752 size_t nh = fXInput.GetHSize();
1753 size_t nw = fXInput.GetWSize();
1754 size_t n = nc * nh * nw;
1755 if ( nVariables != n) {
1756 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1757 << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1758 << Endl;
1759 }
1760 for (size_t j = 0; j < n; j++) {
1761 // in this case TMVA event has same order as input tensor
1762 fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1763 }
1764 }
1765 // copy buffer in input
1766 fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1767
1768 // perform the prediction
1769 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1770
1771 // return value
1772 double mvaValue = (*fYHat)(0, 0);
1773
1774 // for debugging
1775#ifdef DEBUG_MVAVALUE
1776 using Tensor_t = std::vector<MatrixImpl_t>;
1777 TMatrixF xInput(n1,n2, inputValues.data() );
1778 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1779 xInput.Print();
1780 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1781 auto & deepnet = *fNet;
1782 std::cout << "Loop on layers " << std::endl;
1783 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1784 std::cout << "Layer " << l;
1785 const auto * layer = deepnet.GetLayerAt(l);
1786 const Tensor_t & layer_output = layer->GetOutput();
1787 layer->Print();
1788 std::cout << "DNN output " << layer_output.size() << std::endl;
1789 for (size_t i = 0; i < layer_output.size(); ++i) {
1790#ifdef R__HAS_TMVAGPU
1791 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1792 TMatrixD m = layer_output[i];
1793#else
1794 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1795#endif
1796 m.Print();
1797 }
1798 const Tensor_t & layer_weights = layer->GetWeights();
1799 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1800 if (layer_weights.size() > 0) {
1801 int i = 0;
1802#ifdef R__HAS_TMVAGPU
1803 TMatrixD m = layer_weights[i];
1804// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1805#else
1806 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1807#endif
1808 m.Print();
1809 }
1810 }
1811#endif
1812
1813 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1814}
1815////////////////////////////////////////////////////////////////////////////////
1816/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1817////////////////////////////////////////////////////////////////////////////////
1818template <typename Architecture_t>
1819std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1820{
1821
1822 // Check whether the model is setup
1823 if (!fNet || fNet->GetDepth() == 0) {
1824 Log() << kFATAL << "The network has not been trained and fNet is not built"
1825 << Endl;
1826 }
1827
1828 // rebuild the networks
1829 this->SetBatchSize(batchSize);
1830 size_t inputDepth = this->GetInputDepth();
1831 size_t inputHeight = this->GetInputHeight();
1832 size_t inputWidth = this->GetInputWidth();
1833 size_t batchDepth = this->GetBatchDepth();
1834 size_t batchHeight = this->GetBatchHeight();
1835 size_t batchWidth = this->GetBatchWidth();
1836 ELossFunction J = fNet->GetLossFunction();
1837 EInitialization I = fNet->GetInitialization();
1838 ERegularization R = fNet->GetRegularization();
1839 Double_t weightDecay = fNet->GetWeightDecay();
1840
1841 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1842 using Matrix_t = typename Architecture_t::Matrix_t;
1843 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1844
1845 // create the deep neural network
1846 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1847 std::vector<DeepNet_t> nets{};
1848 fBuildNet = false;
1849 CreateDeepNet(deepNet,nets);
1850
1851 // copy weights from the saved fNet to the built DeepNet
1852 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1853 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1854 // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1855 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1856 // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1857 // }
1858 }
1859
1860 size_t n1 = deepNet.GetBatchHeight();
1861 size_t n2 = deepNet.GetBatchWidth();
1862 size_t n0 = deepNet.GetBatchSize();
1863 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1864 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1865 n1 = deepNet.GetBatchSize();
1866 n0 = 1;
1867 }
1868 //this->SetBatchDepth(n0);
1869 Long64_t nEvents = lastEvt - firstEvt;
1870 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1871 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1872
1873
1874 // Tensor_t xInput;
1875 // for (size_t i = 0; i < n0; ++i)
1876 // xInput.emplace_back(Matrix_t(n1,n2));
1877
1878 // create pointer to output matrix used for the predictions
1879 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1880
1881 // use timer
1882 Timer timer( nEvents, GetName(), kTRUE );
1883
1884 if (logProgress)
1885 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1886 << "Evaluation of " << GetMethodName() << " on "
1887 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1888 << " sample (" << nEvents << " events)" << Endl;
1889
1890
1891 // eventg loop
1892 std::vector<double> mvaValues(nEvents);
1893
1894
1895 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1896
1897 Long64_t ievt_end = ievt + batchSize;
1898 // case of batch prediction for
1899 if (ievt_end <= lastEvt) {
1900
1901 if (ievt == firstEvt) {
1902 Data()->SetCurrentEvent(ievt);
1903 size_t nVariables = GetEvent()->GetNVariables();
1904
1905 if (n1 == batchSize && n0 == 1) {
1906 if (n2 != nVariables) {
1907 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1908 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1909 << Endl;
1910 }
1911 } else {
1912 if (n1*n2 != nVariables || n0 != batchSize) {
1913 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1914 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1915 << Endl;
1916 }
1917 }
1918 }
1919
1920 auto batch = testData.GetTensorBatch();
1921 auto inputTensor = batch.GetInput();
1922
1923 auto xInput = batch.GetInput();
1924 // make the prediction
1925 deepNet.Prediction(yHat, xInput, fOutputFunction);
1926 for (size_t i = 0; i < batchSize; ++i) {
1927 double value = yHat(i,0);
1928 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1929 }
1930 }
1931 else {
1932 // case of remaining events: compute prediction by single event !
1933 for (Long64_t i = ievt; i < lastEvt; ++i) {
1934 Data()->SetCurrentEvent(i);
1935 mvaValues[i] = GetMvaValue();
1936 }
1937 }
1938 }
1939
1940 if (logProgress) {
1941 Log() << kINFO
1942 << "Elapsed time for evaluation of " << nEvents << " events: "
1943 << timer.GetElapsedTime() << " " << Endl;
1944 }
1945
1946 return mvaValues;
1947}
1948
1949const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1950{
1951 size_t nVariables = GetEvent()->GetNVariables();
1952 MatrixImpl_t X(1, nVariables);
1953 TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1954 const Event *ev = GetEvent();
1955 const std::vector<Float_t>& inputValues = ev->GetValues();
1956 for (size_t i = 0; i < nVariables; i++) {
1957 X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1958 }
1959 //X_vec.emplace_back(X);
1960
1961 size_t nTargets = std::max(1u, ev->GetNTargets());
1962 MatrixImpl_t YHat(1, nTargets);
1963 std::vector<Float_t> output(nTargets);
1964 fNet->Prediction(YHat, X_vec, fOutputFunction);
1965
1966 for (size_t i = 0; i < nTargets; i++)
1967 output[i] = YHat(0, i);
1968
1969 if (fRegressionReturnVal == NULL) {
1970 fRegressionReturnVal = new std::vector<Float_t>();
1971 }
1972 fRegressionReturnVal->clear();
1973
1974 Event * evT = new Event(*ev);
1975 for (size_t i = 0; i < nTargets; ++i) {
1976 evT->SetTarget(i, output[i]);
1977 }
1978
1979 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1980 for (size_t i = 0; i < nTargets; ++i) {
1981 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1982 }
1983 delete evT;
1984 return *fRegressionReturnVal;
1985}
1986
1987const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1988{
1989 size_t nVariables = GetEvent()->GetNVariables();
1990 MatrixImpl_t X(1, nVariables);
1991 TensorImpl_t X_vec ( 1, 1, nVariables);
1992 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1993 if (fMulticlassReturnVal == NULL) {
1994 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1995 }
1996
1997 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1998 for (size_t i = 0; i < nVariables; i++) {
1999 X_vec(0,i, 0) = inputValues[i];
2000 }
2001 //X_vec.emplace_back(X);
2002 fNet->Prediction(YHat, X_vec, fOutputFunction);
2003 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2004 (*fMulticlassReturnVal)[i] = YHat(0, i);
2005 }
2006 return *fMulticlassReturnVal;
2007}
2008
2009
2010////////////////////////////////////////////////////////////////////////////////
2011/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2012////////////////////////////////////////////////////////////////////////////////
2013std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2014{
2015 // Long64_t nEvents = Data()->GetNEvents();
2016 // std::vector<Double_t> v(nEvents);
2017 // for (Long64_t i = 0; i < nEvents; ++i) {
2018 // Data()->SetCurrentEvent(i);
2019 // v[i] = GetMvaValue();
2020 // }
2021 // return v;
2022
2023
2024 Long64_t nEvents = Data()->GetNEvents();
2025 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026 if (firstEvt < 0) firstEvt = 0;
2027 nEvents = lastEvt-firstEvt;
2028
2029 // use same batch size as for training (from first strategy)
2030 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2031 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2032 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2033
2034 // using for training same scalar type defined for the prediction
2035 if (this->GetArchitectureString() == "GPU") {
2036#ifdef R__HAS_TMVAGPU
2037 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2038#ifdef R__HAS_CUDNN
2039 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2040#else
2041 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2042#endif
2043
2044#endif
2045 } else if (this->GetArchitectureString() == "CPU") {
2046//#ifdef R__HAS_TMVACPU
2047 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2049//#endif
2050 }
2051 Log() << kINFO << "ERROR: STANDARD architecture is not supported anymore for MethodDL ! "
2052 << Endl << Endl;
2053// #if HAVE_REFERENCE
2054// return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2055// #else
2056 return std::vector<Double_t>(nEvents,TMath::QuietNaN());
2057//#endif
2058
2059}
2060////////////////////////////////////////////////////////////////////////////////
2061void MethodDL::AddWeightsXMLTo(void * parent) const
2062{
2063 // Create the parent XML node with name "Weights"
2064 auto & xmlEngine = gTools().xmlengine();
2065 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2066
2067 /*! Get all necessary information, in order to be able to reconstruct the net
2068 * if we read the same XML file. */
2069
2070 // Deep Net specific info
2071 Int_t depth = fNet->GetDepth();
2072
2073 Int_t inputDepth = fNet->GetInputDepth();
2074 Int_t inputHeight = fNet->GetInputHeight();
2075 Int_t inputWidth = fNet->GetInputWidth();
2076
2077 Int_t batchSize = fNet->GetBatchSize();
2078
2079 Int_t batchDepth = fNet->GetBatchDepth();
2080 Int_t batchHeight = fNet->GetBatchHeight();
2081 Int_t batchWidth = fNet->GetBatchWidth();
2082
2083 char lossFunction = static_cast<char>(fNet->GetLossFunction());
2084 char initialization = static_cast<char>(fNet->GetInitialization());
2085 char regularization = static_cast<char>(fNet->GetRegularization());
2086
2087 Double_t weightDecay = fNet->GetWeightDecay();
2088
2089 // Method specific info (not sure these are needed)
2090 char outputFunction = static_cast<char>(this->GetOutputFunction());
2091 //char lossFunction = static_cast<char>(this->GetLossFunction());
2092
2093 // Add attributes to the parent node
2094 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2095
2096 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2097 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2098 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2099
2100 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2101 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2102 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2103 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2104
2105 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2106 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2107 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2108 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2109
2110 gTools().AddAttr(nn, "WeightDecay", weightDecay);
2111
2112
2113 for (Int_t i = 0; i < depth; i++)
2114 {
2115 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2116 }
2117
2118
2119}
2120
2121////////////////////////////////////////////////////////////////////////////////
2123{
2124
2125 auto netXML = gTools().GetChild(rootXML, "Weights");
2126 if (!netXML){
2127 netXML = rootXML;
2128 }
2129
2130 size_t netDepth;
2131 gTools().ReadAttr(netXML, "NetDepth", netDepth);
2132
2133 size_t inputDepth, inputHeight, inputWidth;
2134 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2135 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2136 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2137
2138 size_t batchSize, batchDepth, batchHeight, batchWidth;
2139 gTools().ReadAttr(netXML, "BatchSize", batchSize);
2140 // use always batchsize = 1
2141 //batchSize = 1;
2142 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2143 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2144 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2145
2146 char lossFunctionChar;
2147 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2148 char initializationChar;
2149 gTools().ReadAttr(netXML, "Initialization", initializationChar);
2150 char regularizationChar;
2151 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2152 char outputFunctionChar;
2153 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2154 double weightDecay;
2155 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2156
2157 // create the net
2158
2159 // DeepNetCpu_t is defined in MethodDL.h
2160 this->SetInputDepth(inputDepth);
2161 this->SetInputHeight(inputHeight);
2162 this->SetInputWidth(inputWidth);
2163 this->SetBatchDepth(batchDepth);
2164 this->SetBatchHeight(batchHeight);
2165 this->SetBatchWidth(batchWidth);
2166
2167
2168
2169 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2170 batchHeight, batchWidth,
2171 static_cast<ELossFunction>(lossFunctionChar),
2172 static_cast<EInitialization>(initializationChar),
2173 static_cast<ERegularization>(regularizationChar),
2174 weightDecay));
2175
2176 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2177
2178
2179 //size_t previousWidth = inputWidth;
2180 auto layerXML = gTools().xmlengine().GetChild(netXML);
2181
2182 // loop on the layer and add them to the network
2183 for (size_t i = 0; i < netDepth; i++) {
2184
2185 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2186
2187 // case of dense layer
2188 if (layerName == "DenseLayer") {
2189
2190 // read width and activation function and then we can create the layer
2191 size_t width = 0;
2192 gTools().ReadAttr(layerXML, "Width", width);
2193
2194 // Read activation function.
2195 TString funcString;
2196 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2197 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2198
2199
2200 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2201
2202 }
2203 // Convolutional Layer
2204 else if (layerName == "ConvLayer") {
2205
2206 // read width and activation function and then we can create the layer
2207 size_t depth = 0;
2208 gTools().ReadAttr(layerXML, "Depth", depth);
2209 size_t fltHeight, fltWidth = 0;
2210 size_t strideRows, strideCols = 0;
2211 size_t padHeight, padWidth = 0;
2212 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2213 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2214 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2215 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2216 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2217 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2218
2219 // Read activation function.
2220 TString funcString;
2221 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2222 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2223
2224
2225 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2226 padHeight, padWidth, actFunction);
2227
2228 }
2229
2230 // MaxPool Layer
2231 else if (layerName == "MaxPoolLayer") {
2232
2233 // read maxpool layer info
2234 size_t filterHeight, filterWidth = 0;
2235 size_t strideRows, strideCols = 0;
2236 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2237 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2238 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2239 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2240
2241 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2242 }
2243 else if (layerName == "ReshapeLayer") {
2244
2245 // read reshape layer info
2246 size_t depth, height, width = 0;
2247 gTools().ReadAttr(layerXML, "Depth", depth);
2248 gTools().ReadAttr(layerXML, "Height", height);
2249 gTools().ReadAttr(layerXML, "Width", width);
2250 int flattening = 0;
2251 gTools().ReadAttr(layerXML, "Flattening",flattening );
2252
2253 fNet->AddReshapeLayer(depth, height, width, flattening);
2254
2255 }
2256 else if (layerName == "RNNLayer") {
2257
2258 // read RNN layer info
2259 size_t stateSize,inputSize, timeSteps = 0;
2260 int rememberState= 0;
2261 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2262 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2263 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2264 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2265
2266 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2267
2268 }
2269 // BatchNorm Layer
2270 else if (layerName == "BatchNormLayer") {
2271 // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2272 fNet->AddBatchNormLayer(0., 0.0);
2273 }
2274 // read weights and biases
2275 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2276
2277 // read next layer
2278 layerXML = gTools().GetNextChild(layerXML);
2279 }
2280
2281 fBuildNet = false;
2282 // create now the input and output matrices
2283 //int n1 = batchHeight;
2284 //int n2 = batchWidth;
2285 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2286 //if (fXInput.size() > 0) fXInput.clear();
2287 //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2289 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2290 // make here a ColumnMajor tensor
2293
2294 // create pointer to output matrix used for the predictions
2295 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2296
2297
2298}
2299
2300
2301////////////////////////////////////////////////////////////////////////////////
2302void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2303{
2304}
2305
2306////////////////////////////////////////////////////////////////////////////////
2308{
2309 // TODO
2310 return NULL;
2311}
2312
2313////////////////////////////////////////////////////////////////////////////////
2315{
2316 // TODO
2317}
2318
2319} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
The Formula class.
Definition: TFormula.h:84
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:3318
UInt_t GetNCpu()
Definition: Config.h:72
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:44
Adagrad Optimizer class.
Definition: Adagrad.h:44
Adam Optimizer class.
Definition: Adam.h:44
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:742
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:102
Generic Deep Neural Network class.
Definition: DeepNet.h:72
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:720
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:635
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:503
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:464
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:418
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:668
Generic layer class.
Definition: DenseLayer.h:57
RMSProp Optimizer class.
Definition: RMSProp.h:44
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:45
Generic General Layer class.
Definition: GeneralLayer.h:49
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:392
Class that contains all the data information.
Definition: DataSetInfo.h:60
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
UInt_t GetClass() const
Definition: Event.h:86
std::vector< Float_t > & GetValues()
Definition: Event.h:94
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
const char * GetName() const
Definition: MethodBase.h:333
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:684
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
const TString & GetMethodName() const
Definition: MethodBase.h:330
const Event * GetEvent() const
Definition: MethodBase.h:749
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
UInt_t GetNVariables() const
Definition: MethodBase.h:344
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:593
UInt_t GetNvar() const
Definition: MethodBase.h:343
TrainingHistory fTrainHistory
Definition: MethodBase.h:425
DataSet * Data() const
Definition: MethodBase.h:408
IPythonInteractive * fInteractive
Definition: MethodBase.h:446
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:101
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:175
void GetHelpMessage() const
Definition: MethodDL.cxx:2314
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:182
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1987
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:170
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:186
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:278
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:200
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1631
size_t GetBatchHeight() const
Definition: MethodDL.h:255
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:2013
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:930
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:189
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:767
TensorImpl_t fXInput
Definition: MethodDL.h:198
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:178
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:190
void Init()
default initializations
Definition: MethodDL.cxx:448
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1010
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1155
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:272
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:261
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:991
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:588
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:265
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:280
void ProcessOptions()
Definition: MethodDL.cxx:226
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:199
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:176
size_t GetInputDepth() const
Definition: MethodDL.h:247
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1949
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:201
TString GetInputLayoutString() const
Definition: MethodDL.h:264
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:285
size_t GetInputHeight() const
Definition: MethodDL.h:248
TString GetArchitectureString() const
Definition: MethodDL.h:270
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:487
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:889
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2302
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2122
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:191
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:85
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:181
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:180
size_t GetBatchDepth() const
Definition: MethodDL.h:254
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:196
size_t GetInputWidth() const
Definition: MethodDL.h:249
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:281
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:262
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:185
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1082
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:668
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:828
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:188
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2307
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:103
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:284
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1043
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:286
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1819
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:260
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:283
TString GetLayoutString() const
Definition: MethodDL.h:266
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:174
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:99
size_t GetBatchWidth() const
Definition: MethodDL.h:256
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2061
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:100
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1697
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1036
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:455
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:193
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:279
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:534
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:187
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:161
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:184
EMsgType GetMinType() const
Definition: MsgLogger.h:71
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:140
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:499
const char * Data() const
Definition: TString.h:364
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
@ kTrailing
Definition: TString.h:262
@ kBoth
Definition: TString.h:262
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
const Int_t n
Definition: legend1.C:16
#define I(x, y, z)
constexpr size_t block
Definition: BatchHelpers.h:29
double T(double x)
Definition: ChebyshevPol.h:34
EInitialization
Definition: Functions.h:70
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:214
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:69
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:882
Double_t QuietNaN()
Returns a quiet NaN as defined by IEEE 754
Definition: TMath.h:891
Double_t Log(Double_t x)
Definition: TMath.h:750
All of the options that can be specified in the training string.
Definition: MethodDL.h:65
DNN::EOptimizer optimizer
Definition: MethodDL.h:71
DNN::ERegularization regularization
Definition: MethodDL.h:70
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:76
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20
REAL epsilon
Definition: triangle.c:617
static void output(int code)
Definition: gifencode.c:226