Logo ROOT   6.16/01
Reference Guide
MethodDL.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/cnn:$Id$Ndl
2// Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3/**********************************************************************************
4 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5 * Package: TMVA *
6 * Class : MethodDL *
7 * Web : http://tmva.sourceforge.net *
8 * *
9 * Description: *
10 * Deep Neural Network Method *
11 * *
12 * Authors (alphabetical): *
13 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#include "TFormula.h"
29#include "TString.h"
30#include "TMath.h"
31
32#include "TMVA/Tools.h"
33#include "TMVA/Configurable.h"
34#include "TMVA/IMethod.h"
36#include "TMVA/MethodDL.h"
37#include "TMVA/Types.h"
39#include "TMVA/DNN/Functions.h"
41#include "TMVA/DNN/SGD.h"
42#include "TMVA/DNN/Adam.h"
43#include "TMVA/DNN/Adagrad.h"
44#include "TMVA/DNN/RMSProp.h"
45#include "TMVA/DNN/Adadelta.h"
46#include "TMVA/Timer.h"
47
48#include "TStopwatch.h"
49
50#include <chrono>
51
54
55using namespace TMVA::DNN::CNN;
56using namespace TMVA::DNN;
57
63
64namespace TMVA {
65
66////////////////////////////////////////////////////////////////////////////////
67TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
68{
69 key.ToUpper();
70 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
71 if (it == keyValueMap.end()) {
72 return TString("");
73 }
74 return it->second;
75}
76
77////////////////////////////////////////////////////////////////////////////////
78template <typename T>
79T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
80
81////////////////////////////////////////////////////////////////////////////////
82template <>
83int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
84{
85 TString value(fetchValueTmp(keyValueMap, key));
86 if (value == "") {
87 return defaultValue;
88 }
89 return value.Atoi();
90}
91
92////////////////////////////////////////////////////////////////////////////////
93template <>
94double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
95{
96 TString value(fetchValueTmp(keyValueMap, key));
97 if (value == "") {
98 return defaultValue;
99 }
100 return value.Atof();
101}
102
103////////////////////////////////////////////////////////////////////////////////
104template <>
105TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
106{
107 TString value(fetchValueTmp(keyValueMap, key));
108 if (value == "") {
109 return defaultValue;
110 }
111 return value;
112}
113
114////////////////////////////////////////////////////////////////////////////////
115template <>
116bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
117{
118 TString value(fetchValueTmp(keyValueMap, key));
119 if (value == "") {
120 return defaultValue;
121 }
122
123 value.ToUpper();
124 if (value == "TRUE" || value == "T" || value == "1") {
125 return true;
126 }
127
128 return false;
129}
130
131////////////////////////////////////////////////////////////////////////////////
132template <>
133std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
134 std::vector<double> defaultValue)
135{
136 TString parseString(fetchValueTmp(keyValueMap, key));
137 if (parseString == "") {
138 return defaultValue;
139 }
140
141 parseString.ToUpper();
142 std::vector<double> values;
143
144 const TString tokenDelim("+");
145 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
146 TIter nextToken(tokenStrings);
147 TObjString *tokenString = (TObjString *)nextToken();
148 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
149 std::stringstream sstr;
150 double currentValue;
151 sstr << tokenString->GetString().Data();
152 sstr >> currentValue;
153 values.push_back(currentValue);
154 }
155 return values;
156}
157
158////////////////////////////////////////////////////////////////////////////////
160{
161 // Set default values for all option strings
162
163 DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
164
165 DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
166
167 DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
168
169 DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
170 " or cross entropy (binary classification).");
171 AddPreDefVal(TString("CROSSENTROPY"));
172 AddPreDefVal(TString("SUMOFSQUARES"));
173 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
174
175 DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
176 AddPreDefVal(TString("XAVIER"));
177 AddPreDefVal(TString("XAVIERUNIFORM"));
178
179 DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
180
181 DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
182 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
183 "Specify as 100 to use exactly 100 events. (Default: 20%)");
184
185 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
186 AddPreDefVal(TString("STANDARD"));
187 AddPreDefVal(TString("CPU"));
188 AddPreDefVal(TString("GPU"));
189 AddPreDefVal(TString("OPENCL"));
190
191 // define training stratgey separated by a separator "|"
192 DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
193 "Momentum=0.3,"
194 "Repetitions=3,"
195 "ConvergenceSteps=50,"
196 "BatchSize=30,"
197 "TestRepetitions=7,"
198 "WeightDecay=0.0,"
199 "Regularization=None,"
200 "DropConfig=0.0,"
201 "DropRepetitions=5"
202 "|"
203 "LearningRate=1e-4,"
204 "Momentum=0.3,"
205 "Repetitions=3,"
206 "ConvergenceSteps=50,"
207 "MaxEpochs=2000,"
208 "BatchSize=20,"
209 "TestRepetitions=7,"
210 "WeightDecay=0.001,"
211 "Regularization=L2,"
212 "DropConfig=0.0+0.5+0.5,"
213 "DropRepetitions=5,"
214 "Multithreading=True",
215 "TrainingStrategy", "Defines the training strategies.");
216}
217
218////////////////////////////////////////////////////////////////////////////////
220{
221
223 Log() << kINFO << "Will ignore negative events in training!" << Endl;
224 }
225
226 if (fArchitectureString == "STANDARD") {
227 Log() << kINFO << "The STANDARD architecture has been deprecated. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
231 << Endl;
232 }
233 if (fArchitectureString == "OPENCL") {
234 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
235 "Please use Architecture=CPU or Architecture=CPU for the "
236 "time being. See the TMVA Users' Guide for instructions "
237 "if you encounter problems."
238 << Endl;
239 }
240
241 // the architecture can now be set at runtime as an option
242
243
244 if (fArchitectureString == "GPU") {
245#ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
246 Log() << kERROR << "CUDA backend not enabled. Please make sure "
247 "you have CUDA installed and it was successfully "
248 "detected by CMAKE by using -Dcuda=On "
249 << Endl;
250#ifdef R__HAS_TMVACPU
251 fArchitectureString = "CPU";
252 Log() << kINFO << "Will use now the CPU architecture !" << Endl;
253#else
254 fArchitectureString = "Standard";
255 Log() << kINFO << "Will use now the Standard architecture !" << Endl;
256#endif
257#else
258 Log() << kINFO << "Will use now the GPU architecture !" << Endl;
259#endif
260 }
261
262 else if (fArchitectureString == "CPU") {
263#ifndef R__HAS_TMVACPU // TMVA has no CPU support
264 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
265 "you have a BLAS implementation and it was successfully "
266 "detected by CMake as well that the imt CMake flag is set."
267 << Endl;
268#ifdef R__HAS_TMVAGPU
269 fArchitectureString = "GPU";
270 Log() << kINFO << "Will use now the GPU architecture !" << Endl;
271#else
272 fArchitectureString = "STANDARD";
273 Log() << kINFO << "Will use now the Standard architecture !" << Endl;
274#endif
275#else
276 Log() << kINFO << "Will use now the CPU architecture !" << Endl;
277#endif
278 }
279
280 else {
281 Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;
282 fArchitectureString = "STANDARD";
283 }
284
285 // Input Layout
288
289 // Loss function and output.
292 if (fErrorStrategy == "SUMOFSQUARES") {
293 fLossFunction = ELossFunction::kMeanSquaredError;
294 }
295 if (fErrorStrategy == "CROSSENTROPY") {
297 }
299 } else if (fAnalysisType == Types::kRegression) {
300 if (fErrorStrategy != "SUMOFSQUARES") {
301 Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
302 << " neural net error function. Setting error function to "
303 << " SUMOFSQUARES now." << Endl;
304 }
305
306 fLossFunction = ELossFunction::kMeanSquaredError;
308 } else if (fAnalysisType == Types::kMulticlass) {
309 if (fErrorStrategy == "SUMOFSQUARES") {
310 fLossFunction = ELossFunction::kMeanSquaredError;
311 }
312 if (fErrorStrategy == "CROSSENTROPY") {
314 }
315 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
316 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
317 }
319 }
320
321 // Initialization
322 // the biases will be always initialized to zero
323 if (fWeightInitializationString == "XAVIER") {
325 } else if (fWeightInitializationString == "XAVIERUNIFORM") {
327 } else if (fWeightInitializationString == "GAUSS") {
329 } else if (fWeightInitializationString == "UNIFORM") {
331 } else if (fWeightInitializationString == "ZERO") {
333 } else {
335 }
336
337 // Training settings.
338
339 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));
340 for (auto &block : strategyKeyValues) {
341 TTrainingSettings settings;
342
343 settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
344 settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
345 settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
346 settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
347 settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
348 settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
349 settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
350 settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
351
352 TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
353 if (regularization == "L1") {
355 } else if (regularization == "L2") {
357 } else {
359 }
360
361 TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
362 if (optimizer == "SGD") {
364 } else if (optimizer == "ADAM") {
366 } else if (optimizer == "ADAGRAD") {
368 } else if (optimizer == "RMSPROP") {
370 } else if (optimizer == "ADADELTA") {
372 } else {
373 // Make Adam as default choice if the input string is
374 // incorrect.
376 }
377
378 TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
379
380 if (strMultithreading.BeginsWith("T")) {
381 settings.multithreading = true;
382 } else {
383 settings.multithreading = false;
384 }
385
386 fTrainingSettings.push_back(settings);
387 }
388}
389
390////////////////////////////////////////////////////////////////////////////////
391/// default initializations
393{
394 // Nothing to do here
395}
396
397////////////////////////////////////////////////////////////////////////////////
398/// Parse the input layout
400{
401 // Define the delimiter
402 const TString delim("|");
403
404 // Get the input layout string
405 TString inputLayoutString = this->GetInputLayoutString();
406
407 size_t depth = 0;
408 size_t height = 0;
409 size_t width = 0;
410
411 // Split the input layout string
412 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
413 TIter nextInputDim(inputDimStrings);
414 TObjString *inputDimString = (TObjString *)nextInputDim();
415 int idxToken = 0;
416
417 for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
418 switch (idxToken) {
419 case 0: // input depth
420 {
421 TString strDepth(inputDimString->GetString());
422 depth = (size_t)strDepth.Atoi();
423 } break;
424 case 1: // input height
425 {
426 TString strHeight(inputDimString->GetString());
427 height = (size_t)strHeight.Atoi();
428 } break;
429 case 2: // input width
430 {
431 TString strWidth(inputDimString->GetString());
432 width = (size_t)strWidth.Atoi();
433 } break;
434 }
435 ++idxToken;
436 }
437
438 this->SetInputDepth(depth);
439 this->SetInputHeight(height);
440 this->SetInputWidth(width);
441}
442
443////////////////////////////////////////////////////////////////////////////////
444/// Parse the input layout
446{
447 // Define the delimiter
448 const TString delim("|");
449
450 // Get the input layout string
451 TString batchLayoutString = this->GetBatchLayoutString();
452
453 size_t batchDepth = 0;
454 size_t batchHeight = 0;
455 size_t batchWidth = 0;
456
457 // Split the input layout string
458 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
459 TIter nextBatchDim(batchDimStrings);
460 TObjString *batchDimString = (TObjString *)nextBatchDim();
461 int idxToken = 0;
462
463 for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
464 switch (idxToken) {
465 case 0: // input depth
466 {
467 TString strDepth(batchDimString->GetString());
468 batchDepth = (size_t)strDepth.Atoi();
469 } break;
470 case 1: // input height
471 {
472 TString strHeight(batchDimString->GetString());
473 batchHeight = (size_t)strHeight.Atoi();
474 } break;
475 case 2: // input width
476 {
477 TString strWidth(batchDimString->GetString());
478 batchWidth = (size_t)strWidth.Atoi();
479 } break;
480 }
481 ++idxToken;
482 }
483
484 this->SetBatchDepth(batchDepth);
485 this->SetBatchHeight(batchHeight);
486 this->SetBatchWidth(batchWidth);
487}
488
489////////////////////////////////////////////////////////////////////////////////
490/// Create a deep net based on the layout string
491template <typename Architecture_t, typename Layer_t>
494{
495 // Layer specification, layer details
496 const TString layerDelimiter(",");
497 const TString subDelimiter("|");
498
499 TString layoutString = this->GetLayoutString();
500
501 //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
502
503 // Split layers
504 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
505 TIter nextLayer(layerStrings);
506 TObjString *layerString = (TObjString *)nextLayer();
507
508
509 for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
510 // Split layer details
511 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
512 TIter nextToken(subStrings);
513 TObjString *token = (TObjString *)nextToken();
514
515 // Determine the type of the layer
516 TString strLayerType = token->GetString();
517
518
519 if (strLayerType == "DENSE") {
520 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
521 } else if (strLayerType == "CONV") {
522 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
523 } else if (strLayerType == "MAXPOOL") {
524 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
525 } else if (strLayerType == "RESHAPE") {
526 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
527 } else if (strLayerType == "RNN") {
528 ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
529 } else if (strLayerType == "LSTM") {
530 Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl;
531 //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);
532 }
533 }
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// Pases the layer string and creates the appropriate dense layer
538template <typename Architecture_t, typename Layer_t>
540 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
541 TString delim)
542{
543 int width = 0;
545
546 // this return number of input variables for the method
547 // it can be used to deduce width of dense layer if specified as N+10
548 // where N is the number of input variables
549 const size_t inputSize = GetNvar();
550
551 // Split layer details
552 TObjArray *subStrings = layerString.Tokenize(delim);
553 TIter nextToken(subStrings);
554 TObjString *token = (TObjString *)nextToken();
555 int idxToken = 0;
556
557 // loop on the tokens
558 // order of sepcifying width and activation function is not relevant
559 // both 100|TANH and TANH|100 are valid cases
560 for (; token != nullptr; token = (TObjString *)nextToken()) {
561 idxToken++;
562 // first token defines the layer type- skip it
563 if (idxToken == 1) continue;
564 // try a match with the activation function
565 TString strActFnc(token->GetString());
566 if (strActFnc == "RELU") {
567 activationFunction = DNN::EActivationFunction::kRelu;
568 } else if (strActFnc == "TANH") {
569 activationFunction = DNN::EActivationFunction::kTanh;
570 } else if (strActFnc == "SYMMRELU") {
571 activationFunction = DNN::EActivationFunction::kSymmRelu;
572 } else if (strActFnc == "SOFTSIGN") {
573 activationFunction = DNN::EActivationFunction::kSoftSign;
574 } else if (strActFnc == "SIGMOID") {
575 activationFunction = DNN::EActivationFunction::kSigmoid;
576 } else if (strActFnc == "LINEAR") {
577 activationFunction = DNN::EActivationFunction::kIdentity;
578 } else if (strActFnc == "GAUSS") {
579 activationFunction = DNN::EActivationFunction::kGauss;
580 } else if (width == 0) {
581 // no match found try to parse as text showing the width
582 // support for input a formula where the variable 'x' is 'N' in the string
583 // use TFormula for the evaluation
584 TString strNumNodes = strActFnc;
585 // number of nodes
586 TString strN("x");
587 strNumNodes.ReplaceAll("N", strN);
588 strNumNodes.ReplaceAll("n", strN);
589 TFormula fml("tmp", strNumNodes);
590 width = fml.Eval(inputSize);
591 }
592
593 }
594
595 // Add the dense layer, initialize the weights and biases and copy
596 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
597 denseLayer->Initialize();
598
599 // add same layer to fNet
600 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
601
602 //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
603
604 // add the copy to all slave nets
605 //for (size_t i = 0; i < nets.size(); i++) {
606 // nets[i].AddDenseLayer(copyDenseLayer);
607 //}
608
609 // check compatibility of added layer
610 // for a dense layer input should be 1 x 1 x DxHxW
611}
612
613////////////////////////////////////////////////////////////////////////////////
614/// Pases the layer string and creates the appropriate convolutional layer
615template <typename Architecture_t, typename Layer_t>
617 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
618 TString delim)
619{
620 int depth = 0;
621 int fltHeight = 0;
622 int fltWidth = 0;
623 int strideRows = 0;
624 int strideCols = 0;
625 int zeroPadHeight = 0;
626 int zeroPadWidth = 0;
628
629 // Split layer details
630 TObjArray *subStrings = layerString.Tokenize(delim);
631 TIter nextToken(subStrings);
632 TObjString *token = (TObjString *)nextToken();
633 int idxToken = 0;
634
635 for (; token != nullptr; token = (TObjString *)nextToken()) {
636 switch (idxToken) {
637 case 1: // depth
638 {
639 TString strDepth(token->GetString());
640 depth = strDepth.Atoi();
641 } break;
642 case 2: // filter height
643 {
644 TString strFltHeight(token->GetString());
645 fltHeight = strFltHeight.Atoi();
646 } break;
647 case 3: // filter width
648 {
649 TString strFltWidth(token->GetString());
650 fltWidth = strFltWidth.Atoi();
651 } break;
652 case 4: // stride in rows
653 {
654 TString strStrideRows(token->GetString());
655 strideRows = strStrideRows.Atoi();
656 } break;
657 case 5: // stride in cols
658 {
659 TString strStrideCols(token->GetString());
660 strideCols = strStrideCols.Atoi();
661 } break;
662 case 6: // zero padding height
663 {
664 TString strZeroPadHeight(token->GetString());
665 zeroPadHeight = strZeroPadHeight.Atoi();
666 } break;
667 case 7: // zero padding width
668 {
669 TString strZeroPadWidth(token->GetString());
670 zeroPadWidth = strZeroPadWidth.Atoi();
671 } break;
672 case 8: // activation function
673 {
674 TString strActFnc(token->GetString());
675 if (strActFnc == "RELU") {
676 activationFunction = DNN::EActivationFunction::kRelu;
677 } else if (strActFnc == "TANH") {
678 activationFunction = DNN::EActivationFunction::kTanh;
679 } else if (strActFnc == "SYMMRELU") {
680 activationFunction = DNN::EActivationFunction::kSymmRelu;
681 } else if (strActFnc == "SOFTSIGN") {
682 activationFunction = DNN::EActivationFunction::kSoftSign;
683 } else if (strActFnc == "SIGMOID") {
684 activationFunction = DNN::EActivationFunction::kSigmoid;
685 } else if (strActFnc == "LINEAR") {
686 activationFunction = DNN::EActivationFunction::kIdentity;
687 } else if (strActFnc == "GAUSS") {
688 activationFunction = DNN::EActivationFunction::kGauss;
689 }
690 } break;
691 }
692 ++idxToken;
693 }
694
695 // Add the convolutional layer, initialize the weights and biases and copy
696 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
697 zeroPadHeight, zeroPadWidth, activationFunction);
698 convLayer->Initialize();
699
700 // Add same layer to fNet
701 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
702 zeroPadHeight, zeroPadWidth, activationFunction);
703
704 //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
705
706 //// add the copy to all slave nets
707 //for (size_t i = 0; i < nets.size(); i++) {
708 // nets[i].AddConvLayer(copyConvLayer);
709 //}
710}
711
712////////////////////////////////////////////////////////////////////////////////
713/// Pases the layer string and creates the appropriate max pool layer
714template <typename Architecture_t, typename Layer_t>
716 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
717 TString delim)
718{
719
720 int filterHeight = 0;
721 int filterWidth = 0;
722 int strideRows = 0;
723 int strideCols = 0;
724
725 // Split layer details
726 TObjArray *subStrings = layerString.Tokenize(delim);
727 TIter nextToken(subStrings);
728 TObjString *token = (TObjString *)nextToken();
729 int idxToken = 0;
730
731 for (; token != nullptr; token = (TObjString *)nextToken()) {
732 switch (idxToken) {
733 case 1: // filter height
734 {
735 TString strFrmHeight(token->GetString());
736 filterHeight = strFrmHeight.Atoi();
737 } break;
738 case 2: // filter width
739 {
740 TString strFrmWidth(token->GetString());
741 filterWidth = strFrmWidth.Atoi();
742 } break;
743 case 3: // stride in rows
744 {
745 TString strStrideRows(token->GetString());
746 strideRows = strStrideRows.Atoi();
747 } break;
748 case 4: // stride in cols
749 {
750 TString strStrideCols(token->GetString());
751 strideCols = strStrideCols.Atoi();
752 } break;
753 }
754 ++idxToken;
755 }
756
757 // Add the Max pooling layer
758 // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
759 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
760
761 // Add the same layer to fNet
762 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
763
764
765 //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
766
767 //// add the copy to all slave nets
768 //for (size_t i = 0; i < nets.size(); i++) {
769 // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
770 //}
771}
772
773////////////////////////////////////////////////////////////////////////////////
774/// Pases the layer string and creates the appropriate reshape layer
775template <typename Architecture_t, typename Layer_t>
777 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
778 TString delim)
779{
780 int depth = 0;
781 int height = 0;
782 int width = 0;
783 bool flattening = false;
784
785 // Split layer details
786 TObjArray *subStrings = layerString.Tokenize(delim);
787 TIter nextToken(subStrings);
788 TObjString *token = (TObjString *)nextToken();
789 int idxToken = 0;
790
791 for (; token != nullptr; token = (TObjString *)nextToken()) {
792 if (token->GetString() == "FLAT") idxToken=4;
793 switch (idxToken) {
794 case 1: {
795 TString strDepth(token->GetString());
796 depth = strDepth.Atoi();
797 } break;
798 case 2: // height
799 {
800 TString strHeight(token->GetString());
801 height = strHeight.Atoi();
802 } break;
803 case 3: // width
804 {
805 TString strWidth(token->GetString());
806 width = strWidth.Atoi();
807 } break;
808 case 4: // flattening
809 {
810 TString flat(token->GetString());
811 if (flat == "FLAT") {
812 flattening = true;
813 }
814 } break;
815 }
816 ++idxToken;
817 }
818
819 // Add the reshape layer
820 // TReshapeLayer<Architecture_t> *reshapeLayer =
821 deepNet.AddReshapeLayer(depth, height, width, flattening);
822
823 // Add the same layer to fNet
824 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
825
826 //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
827
828 //// add the copy to all slave nets
829 //for (size_t i = 0; i < nets.size(); i++) {
830 // nets[i].AddReshapeLayer(copyReshapeLayer);
831 //}
832}
833
834////////////////////////////////////////////////////////////////////////////////
835/// Pases the layer string and creates the appropriate rnn layer
836template <typename Architecture_t, typename Layer_t>
838 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
839 TString delim)
840{
841 // int depth = 0;
842 int stateSize = 0;
843 int inputSize = 0;
844 int timeSteps = 0;
845 bool rememberState = false;
846
847 // Split layer details
848 TObjArray *subStrings = layerString.Tokenize(delim);
849 TIter nextToken(subStrings);
850 TObjString *token = (TObjString *)nextToken();
851 int idxToken = 0;
852
853 for (; token != nullptr; token = (TObjString *)nextToken()) {
854 switch (idxToken) {
855 case 1: // state size
856 {
857 TString strstateSize(token->GetString());
858 stateSize = strstateSize.Atoi();
859 } break;
860 case 2: // input size
861 {
862 TString strinputSize(token->GetString());
863 inputSize = strinputSize.Atoi();
864 } break;
865 case 3: // time steps
866 {
867 TString strtimeSteps(token->GetString());
868 timeSteps = strtimeSteps.Atoi();
869 }
870 case 4: // remember state (1 or 0)
871 {
872 TString strrememberState(token->GetString());
873 rememberState = (bool) strrememberState.Atoi();
874 } break;
875 }
876 ++idxToken;
877 }
878
879 // Add the recurrent layer, initialize the weights and biases and copy
880 TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
881 timeSteps, rememberState);
882 basicRNNLayer->Initialize();
883
884 // Add same layer to fNet
885 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
886
887 //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);
888
889 //// add the copy to all slave nets
890 //for (size_t i = 0; i < nets.size(); i++) {
891 // nets[i].AddBasicRNNLayer(copyRNNLayer);
892 //}
893}
894
895////////////////////////////////////////////////////////////////////////////////
896/// Pases the layer string and creates the appropriate lstm layer
897template <typename Architecture_t, typename Layer_t>
899 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
900 TString delim)
901{
902 // Split layer details
903 TObjArray *subStrings = layerString.Tokenize(delim);
904 TIter nextToken(subStrings);
905 TObjString *token = (TObjString *)nextToken();
906 int idxToken = 0;
907
908 for (; token != nullptr; token = (TObjString *)nextToken()) {
909 switch (idxToken) {
910 }
911 ++idxToken;
912 }
913}
914
915////////////////////////////////////////////////////////////////////////////////
916/// Standard constructor.
917MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
918 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
919 fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
920 fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
921 fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
922{
923 // Nothing to do here
924}
925
926////////////////////////////////////////////////////////////////////////////////
927/// Constructor from a weight file.
928MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
929 : MethodBase(Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),
930 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(),
931 fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
932 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
933{
934 // Nothing to do here
935}
936
937////////////////////////////////////////////////////////////////////////////////
938/// Destructor.
940{
941 // Nothing to do here
942}
943
944////////////////////////////////////////////////////////////////////////////////
945/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
946auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
947{
948 // remove empty spaces
949 parseString.ReplaceAll(" ","");
950 KeyValueVector_t blockKeyValues;
951 const TString keyValueDelim("=");
952
953 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
954 TIter nextBlock(blockStrings);
955 TObjString *blockString = (TObjString *)nextBlock();
956
957 for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
958 blockKeyValues.push_back(std::map<TString, TString>());
959 std::map<TString, TString> &currentBlock = blockKeyValues.back();
960
961 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
962 TIter nextToken(subStrings);
963 TObjString *token = (TObjString *)nextToken();
964
965 for (; token != nullptr; token = (TObjString *)nextToken()) {
966 TString strKeyValue(token->GetString());
967 int delimPos = strKeyValue.First(keyValueDelim.Data());
968 if (delimPos <= 0) continue;
969
970 TString strKey = TString(strKeyValue(0, delimPos));
971 strKey.ToUpper();
972 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
973
974 strKey.Strip(TString::kBoth, ' ');
975 strValue.Strip(TString::kBoth, ' ');
976
977 currentBlock.insert(std::make_pair(strKey, strValue));
978 }
979 }
980 return blockKeyValues;
981}
982
983////////////////////////////////////////////////////////////////////////////////
984/// What kind of analysis type can handle the CNN
986{
987 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
988 if (type == Types::kMulticlass) return kTRUE;
989 if (type == Types::kRegression) return kTRUE;
990
991 return kFALSE;
992}
993
994////////////////////////////////////////////////////////////////////////////////
995/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
996/// 100 etc.
997/// - 20% and 0.2 selects 20% of the training set as validation data.
998/// - 100 selects 100 events as the validation data.
999///
1000/// @return number of samples in validation set
1001///
1003{
1004 Int_t nValidationSamples = 0;
1005 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1006
1007 // Parsing + Validation
1008 // --------------------
1009 if (fNumValidationString.EndsWith("%")) {
1010 // Relative spec. format 20%
1011 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1012
1013 if (intValStr.IsFloat()) {
1014 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1015 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1016 } else {
1017 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1018 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1019 }
1020 } else if (fNumValidationString.IsFloat()) {
1021 Double_t valSizeAsDouble = fNumValidationString.Atof();
1022
1023 if (valSizeAsDouble < 1.0) {
1024 // Relative spec. format 0.2
1025 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1026 } else {
1027 // Absolute spec format 100 or 100.0
1028 nValidationSamples = valSizeAsDouble;
1029 }
1030 } else {
1031 Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1032 << Endl;
1033 }
1034
1035 // Value validation
1036 // ----------------
1037 if (nValidationSamples < 0) {
1038 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1039 }
1040
1041 if (nValidationSamples == 0) {
1042 Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1043 }
1044
1045 if (nValidationSamples >= (Int_t)trainingSetSize) {
1046 Log() << kFATAL << "Validation size \"" << fNumValidationString
1047 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1048 }
1049
1050 return nValidationSamples;
1051}
1052
1053
1054////////////////////////////////////////////////////////////////////////////////
1055/// Implementation of architecture specific train method
1056///
1057template <typename Architecture_t>
1059{
1060
1061
1062 using Scalar_t = typename Architecture_t::Scalar_t;
1065 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1066
1067 bool debug = Log().GetMinType() == kDEBUG;
1068
1069
1070 // Determine the number of outputs
1071 // // size_t outputSize = 1;
1072 // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
1073 // // outputSize = GetNTargets();
1074 // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
1075 // // outputSize = DataInfo().GetNClasses();
1076 // // }
1077
1078 // set the random seed for weight initialization
1079 Architecture_t::SetRandomSeed(fRandomSeed);
1080
1081 ///split training data in training and validation data
1082 // and determine the number of training and testing examples
1083
1084 size_t nValidationSamples = GetNumValidationSamples();
1085 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1086
1087 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1088 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1089 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1090
1091 size_t trainingPhase = 1;
1092
1093 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1094
1095 size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1096
1097
1098 // After the processing of the options, initialize the master deep net
1099 size_t batchSize = settings.batchSize;
1100 // Should be replaced by actual implementation. No support for this now.
1101 size_t inputDepth = this->GetInputDepth();
1102 size_t inputHeight = this->GetInputHeight();
1103 size_t inputWidth = this->GetInputWidth();
1104 size_t batchDepth = this->GetBatchDepth();
1105 size_t batchHeight = this->GetBatchHeight();
1106 size_t batchWidth = this->GetBatchWidth();
1107 ELossFunction J = this->GetLossFunction();
1109 ERegularization R = settings.regularization;
1110 EOptimizer O = settings.optimizer;
1111 Scalar_t weightDecay = settings.weightDecay;
1112
1113 //Batch size should be included in batch layout as well. There are two possibilities:
1114 // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1115 // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1116 // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1117 // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1118
1119 if (batchDepth != batchSize && batchDepth > 1) {
1120 Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1121 return;
1122 }
1123 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1124 Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1125 return;
1126 }
1127
1128
1129 //check also that input layout compatible with batch layout
1130 bool badLayout = false;
1131 // case batch depth == batch size
1132 if (batchDepth == batchSize)
1133 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1134 // case batch Height is batch size
1135 if (batchHeight == batchSize && batchDepth == 1)
1136 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1137 if (badLayout) {
1138 Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1139 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1140 return;
1141 }
1142
1143 // check batch size is compatible with number of events
1144 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1145 Log() << kFATAL << "Number of samples in the datasets are train: ("
1146 << nTrainingSamples << ") test: (" << nValidationSamples
1147 << "). One of these is smaller than the batch size of "
1148 << settings.batchSize << ". Please increase the batch"
1149 << " size to be at least the same size as the smallest"
1150 << " of them." << Endl;
1151 }
1152
1153
1154 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1155
1156 // create a copy of DeepNet for evaluating but with batch size = 1
1157 // fNet is the saved network and will be with CPU or Referrence architecture
1158 if (trainingPhase == 1) {
1159 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1160 batchHeight, batchWidth, J, I, R, weightDecay));
1161 fBuildNet = true;
1162 }
1163 else
1164 fBuildNet = false;
1165
1166 // Initialize the vector of slave nets
1167 std::vector<DeepNet_t> nets{};
1168 nets.reserve(nThreads);
1169 for (size_t i = 0; i < nThreads; i++) {
1170 // create a copies of the master deep net
1171 nets.push_back(deepNet);
1172 }
1173
1174 // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1175 CreateDeepNet(deepNet, nets);
1176
1177 if (trainingPhase > 1) {
1178 // copy initial weights from fNet to deepnet
1179 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1180 const auto & nLayer = fNet->GetLayerAt(i);
1181 const auto & dLayer = deepNet.GetLayerAt(i);
1182 // could use a traits for detecting equal architectures
1183 // dLayer->CopyWeights(nLayer->GetWeights());
1184 // dLayer->CopyBiases(nLayer->GetBiases());
1185 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1186 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1187 }
1188 }
1189
1190 // when fNet is built create also input matrix that will be used to evaluate it
1191 if (fBuildNet) {
1192 int n1 = batchHeight;
1193 int n2 = batchWidth;
1194 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1195 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1196 fXInput.emplace_back(MatrixImpl_t(n1,n2));
1197 // create pointer to output matrix used for the predictions
1198 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1199
1200 // print the created network
1201 Log() << "***** Deep Learning Network *****" << Endl;
1202 if (Log().GetMinType() <= kINFO)
1203 deepNet.Print();
1204 }
1205 Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1206
1207 // Loading the training and validation datasets
1208 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1209 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1210 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1211 deepNet.GetOutputWidth(), nThreads);
1212
1213 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1214 TensorDataLoader_t validationData(validationTuple, nValidationSamples, deepNet.GetBatchSize(),
1215 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1216 deepNet.GetOutputWidth(), nThreads);
1217
1218
1219
1220 // do an evaluation of the network to compute initial minimum test error
1221
1222 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1223
1224 Double_t minValError = 0.0;
1225 for (auto batch : validationData) {
1226 auto inputTensor = batch.GetInput();
1227 auto outputMatrix = batch.GetOutput();
1228 auto weights = batch.GetWeights();
1229 // should we apply droput to the loss ??
1230 minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1231 }
1232 // add Regularization term
1233 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1234 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1235 minValError += regzTerm;
1236
1237
1238 // create a pointer to base class VOptimizer
1239 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1240
1241 // initialize the base class pointer with the corresponding derived class object.
1242 switch (O) {
1243
1244 case EOptimizer::kSGD:
1245 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1246 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1247 break;
1248
1249 case EOptimizer::kAdam:
1250 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1251 new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1252 break;
1253
1254 case EOptimizer::kAdagrad:
1255 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1256 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1257 break;
1258
1259 case EOptimizer::kRMSProp:
1260 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1261 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1262 break;
1263
1264 case EOptimizer::kAdadelta:
1265 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1266 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1267 break;
1268 }
1269
1270
1271 // Initialize the vector of batches, one batch for one slave network
1272 std::vector<TTensorBatch<Architecture_t>> batches{};
1273
1274 bool converged = false;
1275 size_t convergenceCount = 0;
1276 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1277
1278 // start measuring
1279 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1280 tstart = std::chrono::system_clock::now();
1281
1282 Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1283 << "Learning rate = " << settings.learningRate
1284 << " regularization " << (char) settings.regularization
1285 << " minimum error = " << minValError
1286 << Endl;
1287 if (!fInteractive) {
1288 std::string separator(62, '-');
1289 Log() << separator << Endl;
1290 Log() << std::setw(10) << "Epoch"
1291 << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1292 << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1293 << std::setw(12) << "nEvents/s"
1294 << std::setw(12) << "Conv. Steps" << Endl;
1295 Log() << separator << Endl;
1296 }
1297
1298 // set up generator for shuffling the batches
1299 // if seed is zero we have always a different order in the batches
1300 size_t shuffleSeed = 0;
1301 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1302 RandomGenerator<TRandom3> rng(shuffleSeed);
1303
1304 // print weights before
1305 if (fBuildNet && debug) {
1306 Log() << "Initial Deep Net Weights " << Endl;
1307 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1308 for (size_t l = 0; l < weights_tensor.size(); ++l)
1309 weights_tensor[l].Print();
1310 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1311 bias_tensor[0].Print();
1312 }
1313
1314
1315 while (!converged) {
1316 optimizer->IncrementGlobalStep();
1317 trainingData.Shuffle(rng);
1318
1319 // execute all epochs
1320 //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1321
1322 for (size_t i = 0; i < batchesInEpoch; ++i ) {
1323 // Clean and load new batches, one batch for one slave net
1324 //batches.clear();
1325 //batches.reserve(nThreads);
1326 //for (size_t j = 0; j < nThreads; j++) {
1327 // batches.push_back(trainingData.GetTensorBatch());
1328 //}
1329
1330 auto my_batch = trainingData.GetTensorBatch();
1331
1332
1333 // execute one optimization step
1334 deepNet.Forward(my_batch.GetInput(), true);
1335 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1336 optimizer->Step();
1337 }
1338 //}
1339
1340 if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1341
1342 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1343
1344 t1 = std::chrono::system_clock::now();
1345
1346 // Compute validation error.
1347 Double_t valError = 0.0;
1348 for (auto batch : validationData) {
1349 auto inputTensor = batch.GetInput();
1350 auto outputMatrix = batch.GetOutput();
1351 auto weights = batch.GetWeights();
1352 // should we apply droput to the loss ??
1353 valError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1354 }
1355 // normalize loss to number of batches and add regularization term
1356 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1357 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1358 valError += regTerm;
1359
1360 t2 = std::chrono::system_clock::now();
1361
1362 // checking for convergence
1363 if (valError < minValError) {
1364 convergenceCount = 0;
1365 } else {
1366 convergenceCount += settings.testInterval;
1367 }
1368
1369 // copy configuration when reached a minimum error
1370 if (valError < minValError ) {
1371 // Copy weights from deepNet to fNet
1372 Log() << std::setw(10) << optimizer->GetGlobalStep()
1373 << " Minimum Test error found - save the configuration " << Endl;
1374 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1375 const auto & nLayer = fNet->GetLayerAt(i);
1376 const auto & dLayer = deepNet.GetLayerAt(i);
1377 ArchitectureImpl_t::CopyDiffArch(nLayer->GetWeights(), dLayer->GetWeights() );
1378 ArchitectureImpl_t::CopyDiffArch(nLayer->GetBiases(), dLayer->GetBiases() );
1379 // std::cout << "Weights for layer " << i << std::endl;
1380 // for (size_t k = 0; k < dlayer->GetWeights().size(); ++k)
1381 // dLayer->GetWeightsAt(k).Print();
1382 }
1383 minValError = valError;
1384 }
1385 else if ( minValError <= 0. )
1386 minValError = valError;
1387
1388
1389 Double_t trainingError = 0.0;
1390 // Compute training error.
1391 for (auto batch : trainingData) {
1392 auto inputTensor = batch.GetInput();
1393 auto outputMatrix = batch.GetOutput();
1394 auto weights = batch.GetWeights();
1395 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1396 }
1397 // normalize loss to number of batches and add regularization term
1398 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1399 trainingError += regTerm;
1400
1401 // stop measuring
1402 tend = std::chrono::system_clock::now();
1403
1404 // Compute numerical throughput.
1405 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1406 std::chrono::duration<double> elapsed1 = t1-tstart;
1407 // std::chrono::duration<double> elapsed2 = t2-tstart;
1408 // time to compute training and test errors
1409 std::chrono::duration<double> elapsed_testing = tend-t1;
1410
1411 double seconds = elapsed_seconds.count();
1412 // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1413 // nGFlops *= deepnet.GetNFlops() * 1e-9;
1414 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1415
1416 converged =
1417 convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1418
1419
1420 Log() << std::setw(10) << optimizer->GetGlobalStep() << " | "
1421 << std::setw(12) << trainingError
1422 << std::setw(12) << valError
1423 << std::setw(12) << seconds / settings.testInterval
1424 << std::setw(12) << elapsed_testing.count()
1425 << std::setw(12) << 1. / eventTime
1426 << std::setw(12) << convergenceCount
1427 << Endl;
1428
1429 if (converged) {
1430 Log() << Endl;
1431 }
1432 tstart = std::chrono::system_clock::now();
1433 }
1434
1435 // if (stepCount % 10 == 0 || converged) {
1436 if (converged && debug) {
1437 Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << optimizer->GetGlobalStep()
1438 << Endl;
1439 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1440 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1441 for (size_t l = 0; l < weights_tensor.size(); ++l)
1442 weights_tensor[l].Print();
1443 bias_tensor[0].Print();
1444 }
1445
1446 }
1447
1448 trainingPhase++;
1449 } // end loop on training Phase
1450}
1451
1452////////////////////////////////////////////////////////////////////////////////
1454{
1455 if (fInteractive) {
1456 Log() << kFATAL << "Not implemented yet" << Endl;
1457 return;
1458 }
1459
1460 // using for training same scalar type defined for the prediction
1461 if (this->GetArchitectureString() == "GPU") {
1462#ifdef R__HAS_TMVAGPU
1463 Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1464 TrainDeepNet<DNN::TCuda<ScalarImpl_t> >();
1465#else
1466 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1467 "you have CUDA installed and it was successfully "
1468 "detected by CMAKE."
1469 << Endl;
1470 return;
1471#endif
1472 } else if (this->GetArchitectureString() == "OPENCL") {
1473 Log() << kFATAL << "OPENCL backend not yet supported." << Endl;
1474 return;
1475 } else if (this->GetArchitectureString() == "CPU") {
1476#ifdef R__HAS_TMVACPU
1477 Log() << kINFO << "Start of deep neural network training on CPU." << Endl << Endl;
1478 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1479#else
1480 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1481 "you have a BLAS implementation and it was successfully "
1482 "detected by CMake as well that the imt CMake flag is set."
1483 << Endl;
1484 return;
1485#endif
1486 } else if (this->GetArchitectureString() == "STANDARD") {
1487 Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1488 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1489 }
1490 else {
1491 Log() << kFATAL << this->GetArchitectureString() <<
1492 " is not a supported archiectire for TMVA::MethodDL"
1493 << Endl;
1494 }
1495
1496// /// definitions for CUDA
1497// #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.
1498// using Architecture_t = DNN::TCuda<Double_t>;
1499// #else
1500// #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.
1501// using Architecture_t = DNN::TCpu<Double_t>;
1502// #else
1503// using Architecture_t = DNN::TReference<Double_t>;
1504// #endif
1505// #endif
1506}
1507
1508
1509////////////////////////////////////////////////////////////////////////////////
1510Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1511{
1512
1513 // note that fNet should have been build with a batch size of 1
1514
1515 if (!fNet || fNet->GetDepth() == 0) {
1516 Log() << kFATAL << "The network has not been trained and fNet is not built"
1517 << Endl;
1518 }
1519
1520 // input size must be equal to 1 which is the batch size of fNet
1521 R__ASSERT(fXInput.size() == 1 && fNet->GetBatchSize() == 1);
1522
1523 // int batchWidth = fNet->GetBatchWidth();
1524 // int batchDepth = fNet->GetBatchDepth();
1525 // int batchHeight = fNet->GetBatchHeight();
1526// int noutput = fNet->GetOutputWidth();
1527
1528
1529 // get current event
1530 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1531
1532 int n1 = fXInput[0].GetNrows();
1533 int n2 = fXInput[0].GetNcols();
1534
1535 int nVariables = GetEvent()->GetNVariables();
1536
1537
1538 if (n1*n2 != nVariables) {
1539 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1540 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1541 << Endl;
1542 }
1543 // get the event data in input matrix
1544 for (int j = 0; j < n1; ++j) {
1545 for (int k = 0; k < n2; k++) {
1546 fXInput[0](j, k) = inputValues[j*n2+k];
1547 }
1548 }
1549
1550 // perform the prediction
1551 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1552
1553 // return value
1554 double mvaValue = (*fYHat)(0, 0);
1555
1556 // for debugging
1557#ifdef DEBUG_MVAVALUE
1558 using Tensor_t = std::vector<MatrixImpl_t>;
1559 TMatrixF xInput(n1,n2, inputValues.data() );
1560 std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1561 xInput.Print();
1562 std::cout << "Output of DeepNet " << mvaValue << std::endl;
1563 auto & deepnet = *fNet;
1564 std::cout << "Loop on layers " << std::endl;
1565 for (int l = 0; l < deepnet.GetDepth(); ++l) {
1566 std::cout << "Layer " << l;
1567 const auto * layer = deepnet.GetLayerAt(l);
1568 const Tensor_t & layer_output = layer->GetOutput();
1569 layer->Print();
1570 std::cout << "DNN output " << layer_output.size() << std::endl;
1571 for (size_t i = 0; i < layer_output.size(); ++i) {
1572#ifdef R__HAS_TMVAGPU
1573 //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1574 TMatrixD m = layer_output[i];
1575#else
1576 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1577#endif
1578 m.Print();
1579 }
1580 const Tensor_t & layer_weights = layer->GetWeights();
1581 std::cout << "DNN weights " << layer_weights.size() << std::endl;
1582 if (layer_weights.size() > 0) {
1583 int i = 0;
1584#ifdef R__HAS_TMVAGPU
1585 TMatrixD m = layer_weights[i];
1586// TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1587#else
1588 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1589#endif
1590 m.Print();
1591 }
1592 }
1593#endif
1594
1595 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1596}
1597////////////////////////////////////////////////////////////////////////////////
1598/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1599////////////////////////////////////////////////////////////////////////////////
1600template <typename Architecture_t>
1601std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1602{
1603
1604 // Check whether the model is setup
1605 if (!fNet || fNet->GetDepth() == 0) {
1606 Log() << kFATAL << "The network has not been trained and fNet is not built"
1607 << Endl;
1608 }
1609
1610 // rebuild the networks
1611
1612 size_t inputDepth = this->GetInputDepth();
1613 size_t inputHeight = this->GetInputHeight();
1614 size_t inputWidth = this->GetInputWidth();
1615 size_t batchDepth = this->GetBatchDepth();
1616 size_t batchHeight = this->GetBatchHeight();
1617 size_t batchWidth = this->GetBatchWidth();
1618 ELossFunction J = fNet->GetLossFunction();
1619 EInitialization I = fNet->GetInitialization();
1620 ERegularization R = fNet->GetRegularization();
1621 Double_t weightDecay = fNet->GetWeightDecay();
1622
1623 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1624 using Matrix_t = typename Architecture_t::Matrix_t;
1625 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1626
1627 // create the deep neural network
1628 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1629 std::vector<DeepNet_t> nets{};
1630 fBuildNet = false;
1631 CreateDeepNet(deepNet,nets);
1632
1633 // copy weights from the saved fNet to the built DeepNet
1634 for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1635 const auto & nLayer = fNet->GetLayerAt(i);
1636 const auto & dLayer = deepNet.GetLayerAt(i);
1637 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1638 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1639 }
1640
1641 size_t n1 = deepNet.GetBatchHeight();
1642 size_t n2 = deepNet.GetBatchWidth();
1643 size_t n0 = deepNet.GetBatchSize();
1644 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1645 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1646 n1 = deepNet.GetBatchSize();
1647 n0 = 1;
1648 }
1649
1650 Long64_t nEvents = lastEvt - firstEvt;
1651 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1652 TensorDataLoader_t testData(testTuple, nEvents, batchSize, n0, n1, n2, deepNet.GetOutputWidth(), 1);
1653
1654
1655 // Tensor_t xInput;
1656 // for (size_t i = 0; i < n0; ++i)
1657 // xInput.emplace_back(Matrix_t(n1,n2));
1658
1659 // create pointer to output matrix used for the predictions
1660 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1661
1662 // use timer
1663 Timer timer( nEvents, GetName(), kTRUE );
1664
1665 if (logProgress)
1666 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1667 << "Evaluation of " << GetMethodName() << " on "
1668 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1669 << " sample (" << nEvents << " events)" << Endl;
1670
1671
1672 // eventg loop
1673 std::vector<double> mvaValues(nEvents);
1674
1675
1676 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1677
1678 Long64_t ievt_end = ievt + batchSize;
1679 // case of batch prediction for
1680 if (ievt_end <= lastEvt) {
1681
1682 if (ievt == firstEvt) {
1683 Data()->SetCurrentEvent(ievt);
1684 size_t nVariables = GetEvent()->GetNVariables();
1685
1686 if (n1 == batchSize && n0 == 1) {
1687 if (n2 != nVariables) {
1688 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1689 << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1690 << Endl;
1691 }
1692 } else {
1693 if (n1*n2 != nVariables || n0 != batchSize) {
1694 Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1695 << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1696 << Endl;
1697 }
1698 }
1699 }
1700
1701 auto batch = testData.GetTensorBatch();
1702 auto inputTensor = batch.GetInput();
1703
1704 auto xInput = batch.GetInput();
1705 // make the prediction
1706 deepNet.Prediction(yHat, xInput, fOutputFunction);
1707 for (size_t i = 0; i < batchSize; ++i) {
1708 double value = yHat(i,0);
1709 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1710 }
1711 }
1712 else {
1713 // case of remaining events: compute prediction by single event !
1714 for (Long64_t i = ievt; i < lastEvt; ++i) {
1715 Data()->SetCurrentEvent(i);
1716 mvaValues[i] = GetMvaValue();
1717 }
1718 }
1719 }
1720
1721 if (logProgress) {
1722 Log() << kINFO
1723 << "Elapsed time for evaluation of " << nEvents << " events: "
1724 << timer.GetElapsedTime() << " " << Endl;
1725 }
1726
1727 return mvaValues;
1728}
1729
1730const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1731{
1732 size_t nVariables = GetEvent()->GetNVariables();
1733 MatrixImpl_t X(1, nVariables);
1734 std::vector<MatrixImpl_t> X_vec;
1735 const Event *ev = GetEvent();
1736 const std::vector<Float_t>& inputValues = ev->GetValues();
1737 for (size_t i = 0; i < nVariables; i++) {
1738 X(0,i) = inputValues[i];
1739 }
1740 X_vec.emplace_back(X);
1741 size_t nTargets = std::max(1u, ev->GetNTargets());
1742 MatrixImpl_t YHat(1, nTargets);
1743 std::vector<Float_t> output(nTargets);
1744 fNet->Prediction(YHat, X_vec, fOutputFunction);
1745
1746 for (size_t i = 0; i < nTargets; i++)
1747 output[i] = YHat(0, i);
1748
1749 if (fRegressionReturnVal == NULL) {
1750 fRegressionReturnVal = new std::vector<Float_t>();
1751 }
1752 fRegressionReturnVal->clear();
1753
1754 Event * evT = new Event(*ev);
1755 for (size_t i = 0; i < nTargets; ++i) {
1756 evT->SetTarget(i, output[i]);
1757 }
1758
1759 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1760 for (size_t i = 0; i < nTargets; ++i) {
1761 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1762 }
1763 delete evT;
1764 return *fRegressionReturnVal;
1765}
1766
1767const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1768{
1769 size_t nVariables = GetEvent()->GetNVariables();
1770 MatrixImpl_t X(1, nVariables);
1771 std::vector<MatrixImpl_t> X_vec;
1772 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1773 if (fMulticlassReturnVal == NULL) {
1774 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1775 }
1776
1777 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1778 for (size_t i = 0; i < nVariables; i++) {
1779 X(0,i) = inputValues[i];
1780 }
1781 X_vec.emplace_back(X);
1782 fNet->Prediction(YHat, X_vec, fOutputFunction);
1783 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1784 (*fMulticlassReturnVal)[i] = YHat(0, i);
1785 }
1786 return *fMulticlassReturnVal;
1787}
1788
1789
1790////////////////////////////////////////////////////////////////////////////////
1791/// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1792////////////////////////////////////////////////////////////////////////////////
1793std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
1794{
1795
1796
1797 Long64_t nEvents = Data()->GetNEvents();
1798 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
1799 if (firstEvt < 0) firstEvt = 0;
1800 nEvents = lastEvt-firstEvt;
1801
1802 // use same batch size as for training (from first strategy)
1803 size_t batchSize = (fTrainingSettings.empty()) ? 1000 : fTrainingSettings.front().batchSize;
1804 if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
1805
1806 // using for training same scalar type defined for the prediction
1807 if (this->GetArchitectureString() == "GPU") {
1808#ifdef R__HAS_TMVAGPU
1809 Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
1810 return PredictDeepNet<DNN::TCuda<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1811#endif
1812 } else if (this->GetArchitectureString() == "CPU") {
1813#ifdef R__HAS_TMVACPU
1814 Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
1815 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1816#endif
1817 }
1818 Log() << kINFO << "Evaluate deep neural network on the STANDARD architecture using batches with size = " << batchSize
1819 << Endl << Endl;
1820 return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1821}
1822////////////////////////////////////////////////////////////////////////////////
1823void MethodDL::AddWeightsXMLTo(void * parent) const
1824{
1825 // Create the parent XML node with name "Weights"
1826 auto & xmlEngine = gTools().xmlengine();
1827 void* nn = xmlEngine.NewChild(parent, 0, "Weights");
1828
1829 /*! Get all necessary information, in order to be able to reconstruct the net
1830 * if we read the same XML file. */
1831
1832 // Deep Net specific info
1833 Int_t depth = fNet->GetDepth();
1834
1835 Int_t inputDepth = fNet->GetInputDepth();
1836 Int_t inputHeight = fNet->GetInputHeight();
1837 Int_t inputWidth = fNet->GetInputWidth();
1838
1839 Int_t batchSize = fNet->GetBatchSize();
1840
1841 Int_t batchDepth = fNet->GetBatchDepth();
1842 Int_t batchHeight = fNet->GetBatchHeight();
1843 Int_t batchWidth = fNet->GetBatchWidth();
1844
1845 char lossFunction = static_cast<char>(fNet->GetLossFunction());
1846 char initialization = static_cast<char>(fNet->GetInitialization());
1847 char regularization = static_cast<char>(fNet->GetRegularization());
1848
1849 Double_t weightDecay = fNet->GetWeightDecay();
1850
1851 // Method specific info (not sure these are needed)
1852 char outputFunction = static_cast<char>(this->GetOutputFunction());
1853 //char lossFunction = static_cast<char>(this->GetLossFunction());
1854
1855 // Add attributes to the parent node
1856 xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
1857
1858 xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
1859 xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
1860 xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
1861
1862 xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
1863 xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
1864 xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
1865 xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
1866
1867 xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1868 xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
1869 xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
1870 xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
1871
1872 gTools().AddAttr(nn, "WeightDecay", weightDecay);
1873
1874
1875 for (Int_t i = 0; i < depth; i++)
1876 {
1877 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
1878 }
1879
1880
1881}
1882
1883////////////////////////////////////////////////////////////////////////////////
1885{
1886
1887 auto netXML = gTools().GetChild(rootXML, "Weights");
1888 if (!netXML){
1889 netXML = rootXML;
1890 }
1891
1892 size_t netDepth;
1893 gTools().ReadAttr(netXML, "NetDepth", netDepth);
1894
1895 size_t inputDepth, inputHeight, inputWidth;
1896 gTools().ReadAttr(netXML, "InputDepth", inputDepth);
1897 gTools().ReadAttr(netXML, "InputHeight", inputHeight);
1898 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1899
1900 size_t batchSize, batchDepth, batchHeight, batchWidth;
1901 gTools().ReadAttr(netXML, "BatchSize", batchSize);
1902 // use always batchsize = 1
1903 //batchSize = 1;
1904 gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
1905 gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
1906 gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
1907
1908 char lossFunctionChar;
1909 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1910 char initializationChar;
1911 gTools().ReadAttr(netXML, "Initialization", initializationChar);
1912 char regularizationChar;
1913 gTools().ReadAttr(netXML, "Regularization", regularizationChar);
1914 char outputFunctionChar;
1915 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1916 double weightDecay;
1917 gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
1918
1919 // create the net
1920
1921 // DeepNetCpu_t is defined in MethodDL.h
1922 this->SetInputDepth(inputDepth);
1923 this->SetInputHeight(inputHeight);
1924 this->SetInputWidth(inputWidth);
1925 this->SetBatchDepth(batchDepth);
1926 this->SetBatchHeight(batchHeight);
1927 this->SetBatchWidth(batchWidth);
1928
1929
1930
1931 fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1932 batchHeight, batchWidth,
1933 static_cast<ELossFunction>(lossFunctionChar),
1934 static_cast<EInitialization>(initializationChar),
1935 static_cast<ERegularization>(regularizationChar),
1936 weightDecay));
1937
1938 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1939
1940
1941 //size_t previousWidth = inputWidth;
1942 auto layerXML = gTools().xmlengine().GetChild(netXML);
1943
1944 // loop on the layer and add them to the network
1945 for (size_t i = 0; i < netDepth; i++) {
1946
1947 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
1948
1949 // case of dense layer
1950 if (layerName == "DenseLayer") {
1951
1952 // read width and activation function and then we can create the layer
1953 size_t width = 0;
1954 gTools().ReadAttr(layerXML, "Width", width);
1955
1956 // Read activation function.
1957 TString funcString;
1958 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
1959 EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
1960
1961
1962 fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
1963
1964 }
1965 // Convolutional Layer
1966 else if (layerName == "ConvLayer") {
1967
1968 // read width and activation function and then we can create the layer
1969 size_t depth = 0;
1970 gTools().ReadAttr(layerXML, "Depth", depth);
1971 size_t fltHeight, fltWidth = 0;
1972 size_t strideRows, strideCols = 0;
1973 size_t padHeight, padWidth = 0;
1974 gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
1975 gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
1976 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
1977 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
1978 gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
1979 gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
1980
1981 // Read activation function.
1982 TString funcString;
1983 gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
1984 EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
1985
1986
1987 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
1988 padHeight, padWidth, actFunction);
1989
1990 }
1991
1992 // MaxPool Layer
1993 else if (layerName == "MaxPoolLayer") {
1994
1995 // read maxpool layer info
1996 size_t filterHeight, filterWidth = 0;
1997 size_t strideRows, strideCols = 0;
1998 gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
1999 gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2000 gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2001 gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2002
2003 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2004 }
2005 else if (layerName == "ReshapeLayer") {
2006
2007 // read reshape layer info
2008 size_t depth, height, width = 0;
2009 gTools().ReadAttr(layerXML, "Depth", depth);
2010 gTools().ReadAttr(layerXML, "Height", height);
2011 gTools().ReadAttr(layerXML, "Width", width);
2012 int flattening = 0;
2013 gTools().ReadAttr(layerXML, "Flattening",flattening );
2014
2015 fNet->AddReshapeLayer(depth, height, width, flattening);
2016
2017 }
2018 else if (layerName == "RNNLayer") {
2019
2020 // read RNN layer info
2021 size_t stateSize,inputSize, timeSteps = 0;
2022 int rememberState= 0;
2023 gTools().ReadAttr(layerXML, "StateSize", stateSize);
2024 gTools().ReadAttr(layerXML, "InputSize", inputSize);
2025 gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2026 gTools().ReadAttr(layerXML, "RememberState", rememberState );
2027
2028 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2029
2030 }
2031
2032
2033 // read eventually weights and biases
2034 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2035
2036 // read next layer
2037 layerXML = gTools().GetNextChild(layerXML);
2038 }
2039
2040 fBuildNet = false;
2041 // create now the input and output matrices
2042 int n1 = batchHeight;
2043 int n2 = batchWidth;
2044 // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2045 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
2046 if (fXInput.size() > 0) fXInput.clear();
2047 fXInput.emplace_back(MatrixImpl_t(n1,n2));
2048 // create pointer to output matrix used for the predictions
2049 fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2050
2051
2052}
2053
2054
2055////////////////////////////////////////////////////////////////////////////////
2056void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2057{
2058}
2059
2060////////////////////////////////////////////////////////////////////////////////
2062{
2063 // TODO
2064 return NULL;
2065}
2066
2067////////////////////////////////////////////////////////////////////////////////
2069{
2070 // TODO
2071}
2072
2073} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:363
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
The Formula class.
Definition: TFormula.h:84
Double_t Eval(Double_t x) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Definition: Configurable.h:168
MsgLogger & Log() const
Definition: Configurable.h:122
Adadelta Optimizer class.
Definition: Adadelta.h:44
Adagrad Optimizer class.
Definition: Adagrad.h:44
Adam Optimizer class.
Definition: Adam.h:44
Generic Deep Neural Network class.
Definition: DeepNet.h:74
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:615
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:446
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:400
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:485
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:648
Generic layer class.
Definition: DenseLayer.h:55
RMSProp Optimizer class.
Definition: RMSProp.h:44
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition: Reference.h:569
Stochastic Batch Gradient Descent Optimizer class.
Definition: SGD.h:45
Generic General Layer class.
Definition: GeneralLayer.h:46
void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:373
Class that contains all the data information.
Definition: DataSetInfo.h:60
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
UInt_t GetClass() const
Definition: Event.h:81
std::vector< Float_t > & GetValues()
Definition: Event.h:89
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
const char * GetName() const
Definition: MethodBase.h:325
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
const TString & GetMethodName() const
Definition: MethodBase.h:322
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:584
UInt_t GetNvar() const
Definition: MethodBase.h:335
DataSet * Data() const
Definition: MethodBase.h:400
IPythonInteractive * fInteractive
Definition: MethodBase.h:437
void SetInputWidth(size_t inputWidth)
Definition: MethodDL.h:259
void GetHelpMessage() const
Definition: MethodDL.cxx:2068
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:170
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1767
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:174
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:97
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1453
size_t GetBatchHeight() const
Definition: MethodDL.h:234
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:1793
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:837
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:177
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:715
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:166
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:178
void Init()
default initializations
Definition: MethodDL.cxx:392
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:917
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1058
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:251
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:240
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:898
void SetInputDepth(size_t inputDepth)
Setters.
Definition: MethodDL.h:257
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:539
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:244
void ProcessOptions()
Definition: MethodDL.cxx:219
size_t GetInputDepth() const
Definition: MethodDL.h:229
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1730
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:98
TString GetInputLayoutString() const
Definition: MethodDL.h:243
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:262
std::vector< MatrixImpl_t > fXInput
Definition: MethodDL.h:96
size_t GetInputHeight() const
Definition: MethodDL.h:230
TString GetArchitectureString() const
Definition: MethodDL.h:249
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:445
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2056
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:1884
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:179
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:81
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:169
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:168
size_t GetBatchDepth() const
Definition: MethodDL.h:233
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:184
size_t GetInputWidth() const
Definition: MethodDL.h:231
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:241
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:173
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:985
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:616
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:776
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:176
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2061
void SetInputHeight(size_t inputHeight)
Definition: MethodDL.h:258
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:261
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:946
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:263
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1601
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:239
TString GetLayoutString() const
Definition: MethodDL.h:245
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
Definition: MethodDL.h:92
size_t GetBatchWidth() const
Definition: MethodDL.h:235
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:1823
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:93
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1510
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:939
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:399
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:181
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:492
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:175
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:159
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:172
EMsgType GetMinType() const
Definition: MsgLogger.h:71
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1235
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
void Print(Option_t *name="") const
Print the matrix as a table of elements.
TMatrixT.
Definition: TMatrixT.h:39
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:128
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:47
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
@ kTrailing
Definition: TString.h:262
@ kBoth
Definition: TString.h:262
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
#define I(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
EInitialization
Definition: Functions.h:70
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
Abstract ClassifierFactory template that handles arbitrary types.
Tools & gTools()
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDL.cxx:67
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:880
Double_t Log(Double_t x)
Definition: TMath.h:748
All of the options that can be specified in the training string.
Definition: MethodDL.h:62
DNN::EOptimizer optimizer
Definition: MethodDL.h:68
DNN::ERegularization regularization
Definition: MethodDL.h:67
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:72
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
auto * t1
Definition: textangle.C:20