Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodDNN.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDNN *
8 * *
9 * *
10 * Description: *
11 * A neural network implementation *
12 * *
13 * Authors (alphabetical): *
14 * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15 * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (see tmva/doc/LICENSE) *
26 **********************************************************************************/
27
28/*! \class TMVA::MethodDNN
29\ingroup TMVA
30Deep Neural Network Implementation.
31*/
32
33#include "TMVA/MethodDNN.h"
34
35#include "TString.h"
36#include "TFormula.h"
37#include "TObjString.h"
38
40#include "TMVA/Configurable.h"
41#include "TMVA/IMethod.h"
42#include "TMVA/MsgLogger.h"
43#include "TMVA/MethodBase.h"
44#include "TMVA/Timer.h"
45#include "TMVA/Types.h"
46#include "TMVA/Tools.h"
47#include "TMVA/Config.h"
48#include "TMVA/Ranking.h"
49
50#include "TMVA/DNN/Net.h"
52
53#include "TMVA/NeuralNet.h"
54#include "TMVA/Monitoring.h"
55
56#ifdef R__HAS_TMVACPU
58#endif
59#ifdef R__HAS_TMVAGPU
61#endif
62
63#include <algorithm>
64#include <iostream>
65#include <string>
66#include <iomanip>
67#include <chrono>
68
70
72
73namespace TMVA
74{
75 using namespace DNN;
76
77 ////////////////////////////////////////////////////////////////////////////////
78 /// standard constructor
79
80 TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
81 const TString &theOption)
82 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
83 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
84 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
85 {
86}
87
88////////////////////////////////////////////////////////////////////////////////
89/// constructor from a weight file
90
93 : MethodBase( Types::kDNN, theData, theWeightFile),
94 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
95 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
96 fTrainingSettings(), fResume(false), fSettings()
97{
98 fWeightInitialization = DNN::EInitialization::kGauss;
99 fOutputFunction = DNN::EOutputFunction::kSigmoid;
100}
101
102////////////////////////////////////////////////////////////////////////////////
103/// destructor
104
106{
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// MLP can handle classification with 2 classes and regression with
113/// one regression-target
114
117 UInt_t /*numberTargets*/ )
118{
119 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
120 if (type == Types::kMulticlass ) return kTRUE;
121 if (type == Types::kRegression ) return kTRUE;
122
123 return kFALSE;
124}
125
126////////////////////////////////////////////////////////////////////////////////
127/// default initializations
128
130 Log() << kWARNING
131 << "MethodDNN is deprecated and it will be removed in future ROOT version. "
132 "Please use MethodDL ( TMVA::kDL)"
133 << Endl;
134
135}
136
137////////////////////////////////////////////////////////////////////////////////
138/// Options to be set in the option string:
139///
140/// - LearningRate <float> DNN learning rate parameter.
141/// - DecayRate <float> Decay rate for learning parameter.
142/// - TestRate <int> Period of validation set error computation.
143/// - BatchSize <int> Number of event per batch.
144///
145/// - ValidationSize <string> How many events to use for validation. "0.2"
146/// or "20%" indicates that a fifth of the
147/// training data should be used. "100"
148/// indicates that 100 events should be used.
149
151{
152
153 DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
154 "Layout",
155 "Layout of the network.");
156
157 DeclareOptionRef(fValidationSize = "20%", "ValidationSize",
158 "Part of the training data to use for "
159 "validation. Specify as 0.2 or 20% to use a "
160 "fifth of the data set as validation set. "
161 "Specify as 100 to use exactly 100 events. "
162 "(Default: 20%)");
163
164 DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
165 "ErrorStrategy",
166 "Loss function: Mean squared error (regression)"
167 " or cross entropy (binary classification).");
168 AddPreDefVal(TString("CROSSENTROPY"));
169 AddPreDefVal(TString("SUMOFSQUARES"));
170 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
171
172 DeclareOptionRef(fWeightInitializationString="XAVIER",
173 "WeightInitialization",
174 "Weight initialization strategy");
175 AddPreDefVal(TString("XAVIER"));
176 AddPreDefVal(TString("XAVIERUNIFORM"));
177
178 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
179 AddPreDefVal(TString("STANDARD"));
180 AddPreDefVal(TString("CPU"));
181 AddPreDefVal(TString("GPU"));
182 AddPreDefVal(TString("OPENCL"));
183
184 DeclareOptionRef(
185 fTrainingStrategyString = "LearningRate=1e-1,"
186 "Momentum=0.3,"
187 "Repetitions=3,"
188 "ConvergenceSteps=50,"
189 "BatchSize=30,"
190 "TestRepetitions=7,"
191 "WeightDecay=0.0,"
192 "Renormalize=L2,"
193 "DropConfig=0.0,"
194 "DropRepetitions=5|LearningRate=1e-4,"
195 "Momentum=0.3,"
196 "Repetitions=3,"
197 "ConvergenceSteps=50,"
198 "BatchSize=20,"
199 "TestRepetitions=7,"
200 "WeightDecay=0.001,"
201 "Renormalize=L2,"
202 "DropConfig=0.0+0.5+0.5,"
203 "DropRepetitions=5,"
204 "Multithreading=True",
205 "TrainingStrategy",
206 "Defines the training strategies.");
207}
208
209////////////////////////////////////////////////////////////////////////////////
210/// parse layout specification string and return a vector, each entry
211/// containing the number of neurons to go in each successive layer
212
214 -> LayoutVector_t
215{
216 LayoutVector_t layout;
217 const TString layerDelimiter(",");
218 const TString subDelimiter("|");
219
220 const size_t inputSize = GetNvar();
221
225
226 for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
227 int numNodes = 0;
228 EActivationFunction activationFunction = EActivationFunction::kTanh;
229
230 TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
233 int idxToken = 0;
234 for (; token != nullptr; token = (TObjString *) nextToken()) {
235 switch (idxToken)
236 {
237 case 0:
238 {
239 TString strActFnc (token->GetString ());
240 if (strActFnc == "RELU") {
241 activationFunction = DNN::EActivationFunction::kRelu;
242 } else if (strActFnc == "TANH") {
243 activationFunction = DNN::EActivationFunction::kTanh;
244 } else if (strActFnc == "SYMMRELU") {
245 activationFunction = DNN::EActivationFunction::kSymmRelu;
246 } else if (strActFnc == "SOFTSIGN") {
247 activationFunction = DNN::EActivationFunction::kSoftSign;
248 } else if (strActFnc == "SIGMOID") {
249 activationFunction = DNN::EActivationFunction::kSigmoid;
250 } else if (strActFnc == "LINEAR") {
251 activationFunction = DNN::EActivationFunction::kIdentity;
252 } else if (strActFnc == "GAUSS") {
253 activationFunction = DNN::EActivationFunction::kGauss;
254 }
255 }
256 break;
257 case 1: // number of nodes
258 {
259 TString strNumNodes (token->GetString ());
260 TString strN ("x");
261 strNumNodes.ReplaceAll ("N", strN);
262 strNumNodes.ReplaceAll ("n", strN);
263 TFormula fml ("tmp",strNumNodes);
264 numNodes = fml.Eval (inputSize);
265 }
266 break;
267 }
268 ++idxToken;
269 }
270 layout.push_back(std::make_pair(numNodes, activationFunction));
271 }
272 return layout;
273}
274
275////////////////////////////////////////////////////////////////////////////////
276/// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
277
281 -> KeyValueVector_t
282{
283 KeyValueVector_t blockKeyValues;
284 const TString keyValueDelim ("=");
285
289
290 for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
291 {
292 blockKeyValues.push_back (std::map<TString,TString>());
293 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
294
295 TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
298
299 for (; token != nullptr; token = (TObjString *)nextToken())
300 {
301 TString strKeyValue (token->GetString ());
302 int delimPos = strKeyValue.First (keyValueDelim.Data ());
303 if (delimPos <= 0)
304 continue;
305
307 strKey.ToUpper();
309
310 strKey.Strip (TString::kBoth, ' ');
311 strValue.Strip (TString::kBoth, ' ');
312
313 currentBlock.insert (std::make_pair (strKey, strValue));
314 }
315 }
316 return blockKeyValues;
317}
318
319////////////////////////////////////////////////////////////////////////////////
320
321TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
322{
323 key.ToUpper ();
324 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
325 if (it == keyValueMap.end()) {
326 return TString ("");
327 }
328 return it->second;
329}
330
331////////////////////////////////////////////////////////////////////////////////
332
333template <typename T>
334T fetchValue(const std::map<TString,TString>& keyValueMap,
335 TString key,
336 T defaultValue);
337
338////////////////////////////////////////////////////////////////////////////////
339
340template <>
341int fetchValue(const std::map<TString,TString>& keyValueMap,
342 TString key,
343 int defaultValue)
344{
346 if (value == "") {
347 return defaultValue;
348 }
349 return value.Atoi ();
350}
351
352////////////////////////////////////////////////////////////////////////////////
353
354template <>
355double fetchValue (const std::map<TString,TString>& keyValueMap,
356 TString key, double defaultValue)
357{
359 if (value == "") {
360 return defaultValue;
361 }
362 return value.Atof ();
363}
364
365////////////////////////////////////////////////////////////////////////////////
366
367template <>
368TString fetchValue (const std::map<TString,TString>& keyValueMap,
369 TString key, TString defaultValue)
370{
372 if (value == "") {
373 return defaultValue;
374 }
375 return value;
376}
377
378////////////////////////////////////////////////////////////////////////////////
379
380template <>
381bool fetchValue (const std::map<TString,TString>& keyValueMap,
382 TString key, bool defaultValue)
383{
385 if (value == "") {
386 return defaultValue;
387 }
388 value.ToUpper ();
389 if (value == "TRUE" || value == "T" || value == "1") {
390 return true;
391 }
392 return false;
393}
394
395////////////////////////////////////////////////////////////////////////////////
396
397template <>
398std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
399 TString key,
400 std::vector<double> defaultValue)
401{
403 if (parseString == "") {
404 return defaultValue;
405 }
406 parseString.ToUpper ();
407 std::vector<double> values;
408
409 const TString tokenDelim ("+");
413 for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
414 std::stringstream sstr;
415 double currentValue;
416 sstr << tokenString->GetString ().Data ();
418 values.push_back (currentValue);
419 }
420 return values;
421}
422
423////////////////////////////////////////////////////////////////////////////////
424
426{
427 if (IgnoreEventsWithNegWeightsInTraining()) {
428 Log() << kINFO
429 << "Will ignore negative events in training!"
430 << Endl;
431 }
432
433 if (fArchitectureString == "STANDARD") {
434 Log() << kERROR << "The STANDARD architecture has been deprecated. "
435 "Please use Architecture=CPU or Architecture=CPU."
436 "See the TMVA Users' Guide for instructions if you "
437 "encounter problems."
438 << Endl;
439 Log() << kFATAL << "The STANDARD architecture has been deprecated. "
440 "Please use Architecture=CPU or Architecture=CPU."
441 "See the TMVA Users' Guide for instructions if you "
442 "encounter problems."
443 << Endl;
444 }
445
446 if (fArchitectureString == "OPENCL") {
447 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
448 "Please use Architecture=CPU or Architecture=CPU for the "
449 "time being. See the TMVA Users' Guide for instructions "
450 "if you encounter problems."
451 << Endl;
452 Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
453 "Please use Architecture=CPU or Architecture=CPU for the "
454 "time being. See the TMVA Users' Guide for instructions "
455 "if you encounter problems."
456 << Endl;
457 }
458
459 if (fArchitectureString == "GPU") {
460#ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
461 Log() << kERROR << "CUDA backend not enabled. Please make sure "
462 "you have CUDA installed and it was successfully "
463 "detected by CMAKE."
464 << Endl;
465 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
466 "you have CUDA installed and it was successfully "
467 "detected by CMAKE."
468 << Endl;
469#endif // DNNCUDA
470 }
471
472 if (fArchitectureString == "CPU") {
473#ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
474 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
475 "you have a BLAS implementation and it was successfully "
476 "detected by CMake as well that the imt CMake flag is set."
477 << Endl;
478 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
479 "you have a BLAS implementation and it was successfully "
480 "detected by CMake as well that the imt CMake flag is set."
481 << Endl;
482#endif // DNNCPU
483 }
484
485 //
486 // Set network structure.
487 //
488
489 fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
490 size_t inputSize = GetNVariables ();
491 size_t outputSize = 1;
492 if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
493 outputSize = GetNTargets();
494 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
495 outputSize = DataInfo().GetNClasses();
496 }
497
498 fNet.SetBatchSize(1);
499 fNet.SetInputWidth(inputSize);
500
501 auto itLayout = std::begin (fLayout);
502 auto itLayoutEnd = std::end (fLayout)-1;
503 for ( ; itLayout != itLayoutEnd; ++itLayout) {
504 fNet.AddLayer((*itLayout).first, (*itLayout).second);
505 }
506 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
507
508 //
509 // Loss function and output.
510 //
511
512 fOutputFunction = EOutputFunction::kSigmoid;
513 if (fAnalysisType == Types::kClassification)
514 {
515 if (fErrorStrategy == "SUMOFSQUARES") {
516 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
517 }
518 if (fErrorStrategy == "CROSSENTROPY") {
519 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
520 }
521 fOutputFunction = EOutputFunction::kSigmoid;
522 } else if (fAnalysisType == Types::kRegression) {
523 if (fErrorStrategy != "SUMOFSQUARES") {
524 Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
525 << " neural net error function. Setting error function to "
526 << " SUMOFSQUARES now." << Endl;
527 }
528 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
529 fOutputFunction = EOutputFunction::kIdentity;
530 } else if (fAnalysisType == Types::kMulticlass) {
531 if (fErrorStrategy == "SUMOFSQUARES") {
532 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
533 }
534 if (fErrorStrategy == "CROSSENTROPY") {
535 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
536 }
537 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
538 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
539 }
540 fOutputFunction = EOutputFunction::kSoftmax;
541 }
542
543 //
544 // Initialization
545 //
546
547 if (fWeightInitializationString == "XAVIER") {
548 fWeightInitialization = DNN::EInitialization::kGauss;
549 }
550 else if (fWeightInitializationString == "XAVIERUNIFORM") {
551 fWeightInitialization = DNN::EInitialization::kUniform;
552 }
553 else {
554 fWeightInitialization = DNN::EInitialization::kGauss;
555 }
556
557 //
558 // Training settings.
559 //
560
561 // Force validation of the ValidationSize option
562 GetNumValidationSamples();
563
564 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
565 TString ("|"),
566 TString (","));
567
568 std::cout << "Parsed Training DNN string " << fTrainingStrategyString << std::endl;
569 std::cout << "STring has size " << strategyKeyValues.size() << std::endl;
570 for (auto& block : strategyKeyValues) {
572
573 settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
574 settings.batchSize = fetchValue(block, "BatchSize", 30);
575 settings.testInterval = fetchValue(block, "TestRepetitions", 7);
576 settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
577 settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
578 settings.momentum = fetchValue(block, "Momentum", 0.3);
579 settings.dropoutProbabilities = fetchValue(block, "DropConfig",
580 std::vector<Double_t>());
581
582 TString regularization = fetchValue(block, "Regularization",
583 TString ("NONE"));
584 if (regularization == "L1") {
585 settings.regularization = DNN::ERegularization::kL1;
586 } else if (regularization == "L2") {
587 settings.regularization = DNN::ERegularization::kL2;
588 } else {
589 settings.regularization = DNN::ERegularization::kNone;
590 }
591
592 TString strMultithreading = fetchValue(block, "Multithreading",
593 TString ("True"));
594 if (strMultithreading.BeginsWith ("T")) {
595 settings.multithreading = true;
596 } else {
597 settings.multithreading = false;
598 }
599
600 fTrainingSettings.push_back(settings);
601 }
602}
603
604////////////////////////////////////////////////////////////////////////////////
605/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
606/// 100 etc.
607/// - 20% and 0.2 selects 20% of the training set as validation data.
608/// - 100 selects 100 events as the validation data.
609///
610/// @return number of samples in validation set
611///
612
614{
616 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
617
618 // Parsing + Validation
619 // --------------------
620 if (fValidationSize.EndsWith("%")) {
621 // Relative spec. format 20%
622 TString intValStr = TString(fValidationSize.Strip(TString::kTrailing, '%'));
623
624 if (intValStr.IsFloat()) {
625 Double_t valSizeAsDouble = fValidationSize.Atof() / 100.0;
626 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
627 } else {
628 Log() << kFATAL << "Cannot parse number \"" << fValidationSize
629 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
630 }
631 } else if (fValidationSize.IsFloat()) {
632 Double_t valSizeAsDouble = fValidationSize.Atof();
633
634 if (valSizeAsDouble < 1.0) {
635 // Relative spec. format 0.2
636 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
637 } else {
638 // Absolute spec format 100 or 100.0
640 }
641 } else {
642 Log() << kFATAL << "Cannot parse number \"" << fValidationSize << "\". Expected string like \"0.2\" or \"100\"."
643 << Endl;
644 }
645
646 // Value validation
647 // ----------------
648 if (nValidationSamples < 0) {
649 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is negative." << Endl;
650 }
651
652 if (nValidationSamples == 0) {
653 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is zero." << Endl;
654 }
655
657 Log() << kFATAL << "Validation size \"" << fValidationSize
658 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
659 }
660
661 return nValidationSamples;
662}
663
664////////////////////////////////////////////////////////////////////////////////
665
667{
668 if (fInteractive && fInteractive->NotInitialized()){
669 std::vector<TString> titles = {"Error on training set", "Error on test set"};
670 fInteractive->Init(titles);
671 // JsMVA progress bar maximum (100%)
672 fIPyMaxIter = 100;
673 }
674
675 for (TTrainingSettings & settings : fTrainingSettings) {
676 size_t nValidationSamples = GetNumValidationSamples();
677 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
679
680 if (nTrainingSamples < settings.batchSize ||
681 nValidationSamples < settings.batchSize ||
682 nTestSamples < settings.batchSize) {
683 Log() << kFATAL << "Number of samples in the datasets are train: "
684 << nTrainingSamples << " valid: " << nValidationSamples
685 << " test: " << nTestSamples << ". "
686 << "One of these is smaller than the batch size of "
687 << settings.batchSize << ". Please increase the batch"
688 << " size to be at least the same size as the smallest"
689 << " of these values." << Endl;
690 }
691 }
692
693 if (fArchitectureString == "GPU") {
694 TrainGpu();
695 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
696 ExitFromTraining();
697 return;
698 } else if (fArchitectureString == "OpenCL") {
699 Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
700 return;
701 } else if (fArchitectureString == "CPU") {
702 TrainCpu();
703 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
704 ExitFromTraining();
705 return;
706 }
707
708 Log() << kINFO << "Using Standard Implementation.";
709
710 std::vector<Pattern> trainPattern;
711 std::vector<Pattern> testPattern;
712
713 size_t nValidationSamples = GetNumValidationSamples();
714 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
715
716 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
717 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
718 const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
719
720 for (auto &event : eventCollectionTraining) {
721 const std::vector<Float_t>& values = event->GetValues();
722 if (fAnalysisType == Types::kClassification) {
723 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
724 trainPattern.push_back(Pattern (values.begin(),
725 values.end(),
727 event->GetWeight()));
728 trainPattern.back().addInput(1.0);
729 } else if (fAnalysisType == Types::kMulticlass) {
730 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
731 oneHot[event->GetClass()] = 1.0;
732 trainPattern.push_back(Pattern (values.begin(), values.end(),
733 oneHot.cbegin(), oneHot.cend(),
734 event->GetWeight()));
735 trainPattern.back().addInput(1.0);
736 } else {
737 const std::vector<Float_t>& targets = event->GetTargets ();
738 trainPattern.push_back(Pattern(values.begin(),
739 values.end(),
740 targets.begin(),
741 targets.end(),
742 event->GetWeight ()));
743 trainPattern.back ().addInput (1.0); // bias node
744 }
745 }
746
747 for (auto &event : eventCollectionTesting) {
748 const std::vector<Float_t>& values = event->GetValues();
749 if (fAnalysisType == Types::kClassification) {
750 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
751 testPattern.push_back(Pattern (values.begin(),
752 values.end(),
754 event->GetWeight()));
755 testPattern.back().addInput(1.0);
756 } else if (fAnalysisType == Types::kMulticlass) {
757 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
758 oneHot[event->GetClass()] = 1.0;
759 testPattern.push_back(Pattern (values.begin(), values.end(),
760 oneHot.cbegin(), oneHot.cend(),
761 event->GetWeight()));
762 testPattern.back().addInput(1.0);
763 } else {
764 const std::vector<Float_t>& targets = event->GetTargets ();
765 testPattern.push_back(Pattern(values.begin(),
766 values.end(),
767 targets.begin(),
768 targets.end(),
769 event->GetWeight ()));
770 testPattern.back ().addInput (1.0); // bias node
771 }
772 }
773
775 std::vector<double> weights;
776
777 net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
778
779 net.setInputSize(fNet.GetInputWidth() + 1);
780 net.setOutputSize(fNet.GetOutputWidth() + 1);
781
782 for (size_t i = 0; i < fNet.GetDepth(); i++) {
783 EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
784 EnumFunction g = EnumFunction::LINEAR;
785 switch(f) {
786 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
787 case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
788 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
789 case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
790 case EActivationFunction::kFastTanh: g = EnumFunction::TANH; break;
791 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
792 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
793 case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
794 }
795 if (i < fNet.GetDepth() - 1) {
796 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
797 } else {
798 ModeOutputValues h = ModeOutputValues::DIRECT;
799 switch(fOutputFunction) {
800 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
801 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
802 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
803 }
804 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
805 }
806 }
807
808 switch(fNet.GetLossFunction()) {
809 case ELossFunction::kMeanSquaredError:
810 net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
811 break;
812 case ELossFunction::kCrossEntropy:
813 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
814 break;
815 case ELossFunction::kSoftmaxCrossEntropy:
816 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
817 break;
818 }
819
820 switch(fWeightInitialization) {
821 case EInitialization::kGauss:
822 net.initializeWeights(WeightInitializationStrategy::XAVIER,
823 std::back_inserter(weights));
824 break;
825 case EInitialization::kUniform:
826 net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
827 std::back_inserter(weights));
828 break;
829 default:
830 net.initializeWeights(WeightInitializationStrategy::XAVIER,
831 std::back_inserter(weights));
832 break;
833 }
834
835 int idxSetting = 0;
836 for (auto s : fTrainingSettings) {
837
838 EnumRegularization r = EnumRegularization::NONE;
839 switch(s.regularization) {
840 case ERegularization::kNone: r = EnumRegularization::NONE; break;
841 case ERegularization::kL1: r = EnumRegularization::L1; break;
842 case ERegularization::kL2: r = EnumRegularization::L2; break;
843 }
844
845 Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
846 s.testInterval, s.weightDecay, r,
847 MinimizerType::fSteepest, s.learningRate,
848 s.momentum, 1, s.multithreading);
849 std::shared_ptr<Settings> ptrSettings(settings);
850 ptrSettings->setMonitoring (0);
851 Log() << kINFO
852 << "Training with learning rate = " << ptrSettings->learningRate ()
853 << ", momentum = " << ptrSettings->momentum ()
854 << ", repetitions = " << ptrSettings->repetitions ()
855 << Endl;
856
857 ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
858 (idxSetting+1)*100.0/(fSettings.size ()));
859
860 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
861 if (!dropConfig.empty ()) {
862 Log () << kINFO << "Drop configuration" << Endl
863 << " drop repetitions = " << ptrSettings->dropRepetitions()
864 << Endl;
865 }
866
867 int idx = 0;
868 for (auto f : dropConfig) {
869 Log () << kINFO << " Layer " << idx << " = " << f << Endl;
870 ++idx;
871 }
872 Log () << kINFO << Endl;
873
874 DNN::Steepest minimizer(ptrSettings->learningRate(),
875 ptrSettings->momentum(),
876 ptrSettings->repetitions());
877 net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
878 ptrSettings.reset();
879 Log () << kINFO << Endl;
880 idxSetting++;
881 }
882 size_t weightIndex = 0;
883 for (size_t l = 0; l < fNet.GetDepth(); l++) {
884 auto & layerWeights = fNet.GetLayer(l).GetWeights();
885 for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
886 for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
887 layerWeights(i,j) = weights[weightIndex];
888 weightIndex++;
889 }
890 }
891 auto & layerBiases = fNet.GetLayer(l).GetBiases();
892 if (l == 0) {
893 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
894 layerBiases(i,0) = weights[weightIndex];
895 weightIndex++;
896 }
897 } else {
898 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
899 layerBiases(i,0) = 0.0;
900 }
901 }
902 }
903 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
904 ExitFromTraining();
905}
906
907////////////////////////////////////////////////////////////////////////////////
908
910{
911
912#ifdef DNNCUDA // Included only if DNNCUDA flag is set.
913 Log() << kINFO << "Start of neural network training on GPU." << Endl << Endl;
914
915 size_t nValidationSamples = GetNumValidationSamples();
916 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
918
919 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
920 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
921
922 size_t trainingPhase = 1;
923 fNet.Initialize(fWeightInitialization);
924 for (TTrainingSettings & settings : fTrainingSettings) {
925
926 if (fInteractive){
927 fInteractive->ClearGraphs();
928 }
929
930 TNet<TCuda<>> net(settings.batchSize, fNet);
931 net.SetWeightDecay(settings.weightDecay);
932 net.SetRegularization(settings.regularization);
933
934 // Need to convert dropoutprobabilities to conventions used
935 // by backend implementation.
936 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
937 for (auto & p : dropoutVector) {
938 p = 1.0 - p;
939 }
940 net.SetDropoutProbabilities(dropoutVector);
941
942 net.InitializeGradients();
943 auto testNet = net.CreateClone(settings.batchSize);
944
945 Log() << kINFO << "Training phase " << trainingPhase << " of "
946 << fTrainingSettings.size() << ":" << Endl;
948
950
951 // Split training data into training and validation set
952 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
953 const std::vector<Event *> trainingInputData =
954 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
955 const std::vector<Event *> testInputData =
956 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
957
958 if (trainingInputData.size() != nTrainingSamples) {
959 Log() << kFATAL << "Inconsistent training sample size" << Endl;
960 }
961 if (testInputData.size() != nTestSamples) {
962 Log() << kFATAL << "Inconsistent test sample size" << Endl;
963 }
964
965 size_t nThreads = 1;
966 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
967 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
969 net.GetBatchSize(), net.GetInputWidth(),
970 net.GetOutputWidth(), nThreads);
972 net.GetInputWidth(), net.GetOutputWidth(),
973 nThreads);
974 DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
975 settings.convergenceSteps,
976 settings.testInterval);
977
978 std::vector<TNet<TCuda<>>> nets{};
979 std::vector<TBatch<TCuda<>>> batches{};
980 nets.reserve(nThreads);
981 for (size_t i = 0; i < nThreads; i++) {
982 nets.push_back(net);
983 for (size_t j = 0; j < net.GetDepth(); j++)
984 {
985 auto &masterLayer = net.GetLayer(j);
986 auto &layer = nets.back().GetLayer(j);
987 TCuda<>::Copy(layer.GetWeights(),
988 masterLayer.GetWeights());
989 TCuda<>::Copy(layer.GetBiases(),
990 masterLayer.GetBiases());
991 }
992 }
993
994 bool converged = false;
995 size_t stepCount = 0;
996 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
997
998 std::chrono::time_point<std::chrono::system_clock> start, end;
999 start = std::chrono::system_clock::now();
1000
1001 if (!fInteractive) {
1002 Log() << std::setw(10) << "Epoch" << " | "
1003 << std::setw(12) << "Train Err."
1004 << std::setw(12) << "Test Err."
1005 << std::setw(12) << "GFLOP/s"
1006 << std::setw(12) << "Conv. Steps" << Endl;
1007 std::string separator(62, '-');
1008 Log() << separator << Endl;
1009 }
1010
1011 while (!converged)
1012 {
1013 stepCount++;
1014
1015 // Perform minimization steps for a full epoch.
1016 trainingData.Shuffle();
1017 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1018 batches.clear();
1019 for (size_t j = 0; j < nThreads; j++) {
1020 batches.reserve(nThreads);
1021 batches.push_back(trainingData.GetBatch());
1022 }
1023 if (settings.momentum > 0.0) {
1024 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1025 } else {
1026 minimizer.Step(net, nets, batches);
1027 }
1028 }
1029
1030 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1031
1032 // Compute test error.
1033 Double_t testError = 0.0;
1034 for (auto batch : testData) {
1035 auto inputMatrix = batch.GetInput();
1036 auto outputMatrix = batch.GetOutput();
1037 testError += testNet.Loss(inputMatrix, outputMatrix);
1038 }
1039 testError /= (Double_t) (nTestSamples / settings.batchSize);
1040
1041 //Log the loss value
1042 fTrainHistory.AddValue("testError",stepCount,testError);
1043
1044 end = std::chrono::system_clock::now();
1045
1046 // Compute training error.
1047 Double_t trainingError = 0.0;
1048 for (auto batch : trainingData) {
1049 auto inputMatrix = batch.GetInput();
1050 auto outputMatrix = batch.GetOutput();
1051 trainingError += net.Loss(inputMatrix, outputMatrix);
1052 }
1054 //Log the loss value
1055 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1056
1057 // Compute numerical throughput.
1058 std::chrono::duration<double> elapsed_seconds = end - start;
1059 double seconds = elapsed_seconds.count();
1060 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1061 nFlops *= net.GetNFlops() * 1e-9;
1062
1063 converged = minimizer.HasConverged(testError);
1064 start = std::chrono::system_clock::now();
1065
1066 if (fInteractive) {
1067 fInteractive->AddPoint(stepCount, trainingError, testError);
1068 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1069 / minimizer.GetConvergenceSteps ();
1070 if (fExitFromTraining) break;
1071 } else {
1072 Log() << std::setw(10) << stepCount << " | "
1073 << std::setw(12) << trainingError
1074 << std::setw(12) << testError
1075 << std::setw(12) << nFlops / seconds
1076 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1077 if (converged) {
1078 Log() << Endl;
1079 }
1080 }
1081 }
1082 }
1083 for (size_t l = 0; l < net.GetDepth(); l++) {
1084 fNet.GetLayer(l).GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1085 fNet.GetLayer(l).GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1086 }
1087 }
1088
1089#else // DNNCUDA flag not set.
1090
1091 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1092 "you have CUDA installed and it was successfully "
1093 "detected by CMAKE." << Endl;
1094#endif // DNNCUDA
1095}
1096
1097////////////////////////////////////////////////////////////////////////////////
1098
1100{
1101
1102#ifdef DNNCPU // Included only if DNNCPU flag is set.
1103 Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
1104
1105 size_t nValidationSamples = GetNumValidationSamples();
1106 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1108
1109 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
1110 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
1111
1112 fNet.Initialize(fWeightInitialization);
1113
1114 size_t trainingPhase = 1;
1115 for (TTrainingSettings & settings : fTrainingSettings) {
1116
1117 if (fInteractive){
1118 fInteractive->ClearGraphs();
1119 }
1120
1121 Log() << "Training phase " << trainingPhase << " of "
1122 << fTrainingSettings.size() << ":" << Endl;
1123 trainingPhase++;
1124
1125 TNet<TCpu<>> net(settings.batchSize, fNet);
1126 net.SetWeightDecay(settings.weightDecay);
1127 net.SetRegularization(settings.regularization);
1128 // Need to convert dropoutprobabilities to conventions used
1129 // by backend implementation.
1130 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1131 for (auto & p : dropoutVector) {
1132 p = 1.0 - p;
1133 }
1134 net.SetDropoutProbabilities(dropoutVector);
1135 net.InitializeGradients();
1136 auto testNet = net.CreateClone(settings.batchSize);
1137
1139
1140 // Split training data into training and validation set
1141 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
1142 const std::vector<Event *> trainingInputData =
1143 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1144 const std::vector<Event *> testInputData =
1145 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1146
1147 if (trainingInputData.size() != nTrainingSamples) {
1148 Log() << kFATAL << "Inconsistent training sample size" << Endl;
1149 }
1150 if (testInputData.size() != nTestSamples) {
1151 Log() << kFATAL << "Inconsistent test sample size" << Endl;
1152 }
1153
1154 size_t nThreads = 1;
1155 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
1156 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
1158 net.GetBatchSize(), net.GetInputWidth(),
1159 net.GetOutputWidth(), nThreads);
1161 net.GetInputWidth(), net.GetOutputWidth(),
1162 nThreads);
1163 DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1164 settings.convergenceSteps,
1165 settings.testInterval);
1166
1167 std::vector<TNet<TCpu<>>> nets{};
1168 std::vector<TBatch<TCpu<>>> batches{};
1169 nets.reserve(nThreads);
1170 for (size_t i = 0; i < nThreads; i++) {
1171 nets.push_back(net);
1172 for (size_t j = 0; j < net.GetDepth(); j++)
1173 {
1174 auto &masterLayer = net.GetLayer(j);
1175 auto &layer = nets.back().GetLayer(j);
1176 TCpu<>::Copy(layer.GetWeights(),
1177 masterLayer.GetWeights());
1178 TCpu<>::Copy(layer.GetBiases(),
1179 masterLayer.GetBiases());
1180 }
1181 }
1182
1183 bool converged = false;
1184 size_t stepCount = 0;
1185 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1186
1187 std::chrono::time_point<std::chrono::system_clock> start, end;
1188 start = std::chrono::system_clock::now();
1189
1190 if (!fInteractive) {
1191 Log() << std::setw(10) << "Epoch" << " | "
1192 << std::setw(12) << "Train Err."
1193 << std::setw(12) << "Test Err."
1194 << std::setw(12) << "GFLOP/s"
1195 << std::setw(12) << "Conv. Steps" << Endl;
1196 std::string separator(62, '-');
1197 Log() << separator << Endl;
1198 }
1199
1200 while (!converged)
1201 {
1202 stepCount++;
1203 // Perform minimization steps for a full epoch.
1204 trainingData.Shuffle();
1205 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1206 batches.clear();
1207 for (size_t j = 0; j < nThreads; j++) {
1208 batches.reserve(nThreads);
1209 batches.push_back(trainingData.GetBatch());
1210 }
1211 if (settings.momentum > 0.0) {
1212 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1213 } else {
1214 minimizer.Step(net, nets, batches);
1215 }
1216 }
1217
1218 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1219
1220 // Compute test error.
1221 Double_t testError = 0.0;
1222 for (auto batch : testData) {
1223 auto inputMatrix = batch.GetInput();
1224 auto outputMatrix = batch.GetOutput();
1225 auto weightMatrix = batch.GetWeights();
1226 testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1227 }
1228 testError /= (Double_t) (nTestSamples / settings.batchSize);
1229
1230 //Log the loss value
1231 fTrainHistory.AddValue("testError",stepCount,testError);
1232
1233 end = std::chrono::system_clock::now();
1234
1235 // Compute training error.
1236 Double_t trainingError = 0.0;
1237 for (auto batch : trainingData) {
1238 auto inputMatrix = batch.GetInput();
1239 auto outputMatrix = batch.GetOutput();
1240 auto weightMatrix = batch.GetWeights();
1241 trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1242 }
1244
1245 //Log the loss value
1246 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1247
1248 if (fInteractive){
1249 fInteractive->AddPoint(stepCount, trainingError, testError);
1250 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1251 if (fExitFromTraining) break;
1252 }
1253
1254 // Compute numerical throughput.
1255 std::chrono::duration<double> elapsed_seconds = end - start;
1256 double seconds = elapsed_seconds.count();
1257 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1258 nFlops *= net.GetNFlops() * 1e-9;
1259
1260 converged = minimizer.HasConverged(testError);
1261 start = std::chrono::system_clock::now();
1262
1263 if (fInteractive) {
1264 fInteractive->AddPoint(stepCount, trainingError, testError);
1265 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1266 / minimizer.GetConvergenceSteps ();
1267 if (fExitFromTraining) break;
1268 } else {
1269 Log() << std::setw(10) << stepCount << " | "
1270 << std::setw(12) << trainingError
1271 << std::setw(12) << testError
1272 << std::setw(12) << nFlops / seconds
1273 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1274 if (converged) {
1275 Log() << Endl;
1276 }
1277 }
1278 }
1279 }
1280
1281
1282 for (size_t l = 0; l < net.GetDepth(); l++) {
1283 auto & layer = fNet.GetLayer(l);
1284 layer.GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1285 layer.GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1286 }
1287 }
1288
1289#else // DNNCPU flag not set.
1290 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1291 "you have a BLAS implementation and it was successfully "
1292 "detected by CMake as well that the imt CMake flag is set." << Endl;
1293#endif // DNNCPU
1294}
1295
1296////////////////////////////////////////////////////////////////////////////////
1297
1299{
1300 size_t nVariables = GetEvent()->GetNVariables();
1301 Matrix_t X(1, nVariables);
1302 Matrix_t YHat(1, 1);
1303
1304 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1305 for (size_t i = 0; i < nVariables; i++) {
1306 X(0,i) = inputValues[i];
1307 }
1308
1309 fNet.Prediction(YHat, X, fOutputFunction);
1310 return YHat(0,0);
1311}
1312
1313////////////////////////////////////////////////////////////////////////////////
1314
1315const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1316{
1317 size_t nVariables = GetEvent()->GetNVariables();
1318 Matrix_t X(1, nVariables);
1319
1320 const Event *ev = GetEvent();
1321 const std::vector<Float_t>& inputValues = ev->GetValues();
1322 for (size_t i = 0; i < nVariables; i++) {
1323 X(0,i) = inputValues[i];
1324 }
1325
1326 size_t nTargets = std::max(1u, ev->GetNTargets());
1328 std::vector<Float_t> output(nTargets);
1329 auto net = fNet.CreateClone(1);
1330 net.Prediction(YHat, X, fOutputFunction);
1331
1332 for (size_t i = 0; i < nTargets; i++)
1333 output[i] = YHat(0, i);
1334
1335 if (fRegressionReturnVal == NULL) {
1336 fRegressionReturnVal = new std::vector<Float_t>();
1337 }
1338 fRegressionReturnVal->clear();
1339
1340 Event * evT = new Event(*ev);
1341 for (size_t i = 0; i < nTargets; ++i) {
1342 evT->SetTarget(i, output[i]);
1343 }
1344
1345 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1346 for (size_t i = 0; i < nTargets; ++i) {
1347 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1348 }
1349 delete evT;
1350 return *fRegressionReturnVal;
1351}
1352
1353const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1354{
1355 size_t nVariables = GetEvent()->GetNVariables();
1356 Matrix_t X(1, nVariables);
1357 Matrix_t YHat(1, DataInfo().GetNClasses());
1358 if (fMulticlassReturnVal == NULL) {
1359 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1360 }
1361
1362 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1363 for (size_t i = 0; i < nVariables; i++) {
1364 X(0,i) = inputValues[i];
1365 }
1366
1367 fNet.Prediction(YHat, X, fOutputFunction);
1368 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1369 (*fMulticlassReturnVal)[i] = YHat(0, i);
1370 }
1371 return *fMulticlassReturnVal;
1372}
1373
1374////////////////////////////////////////////////////////////////////////////////
1375
1376void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1377{
1378 void* nn = gTools().xmlengine().NewChild(parent, nullptr, "Weights");
1379 Int_t inputWidth = fNet.GetInputWidth();
1380 Int_t depth = fNet.GetDepth();
1381 char lossFunction = static_cast<char>(fNet.GetLossFunction());
1382 gTools().xmlengine().NewAttr(nn, nullptr, "InputWidth",
1383 gTools().StringFromInt(inputWidth));
1384 gTools().xmlengine().NewAttr(nn, nullptr, "Depth", gTools().StringFromInt(depth));
1385 gTools().xmlengine().NewAttr(nn, nullptr, "LossFunction", TString(lossFunction));
1386 gTools().xmlengine().NewAttr(nn, nullptr, "OutputFunction",
1387 TString(static_cast<char>(fOutputFunction)));
1388
1389 for (Int_t i = 0; i < depth; i++) {
1390 const auto& layer = fNet.GetLayer(i);
1391 auto layerxml = gTools().xmlengine().NewChild(nn, nullptr, "Layer");
1392 int activationFunction = static_cast<int>(layer.GetActivationFunction());
1393 gTools().xmlengine().NewAttr(layerxml, nullptr, "ActivationFunction",
1394 TString::Itoa(activationFunction, 10));
1395 WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1396 WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1397 }
1398}
1399
1400////////////////////////////////////////////////////////////////////////////////
1401
1403{
1404 auto netXML = gTools().GetChild(rootXML, "Weights");
1405 if (!netXML){
1406 netXML = rootXML;
1407 }
1408
1409 fNet.Clear();
1410 fNet.SetBatchSize(1);
1411
1412 size_t inputWidth, depth;
1413 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1414 gTools().ReadAttr(netXML, "Depth", depth);
1415 char lossFunctionChar;
1416 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1417 char outputFunctionChar;
1418 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1419
1420 fNet.SetInputWidth(inputWidth);
1421 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1422 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1423
1424 size_t previousWidth = inputWidth;
1425 auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1426 for (size_t i = 0; i < depth; i++) {
1427 TString fString;
1429
1430 // Read activation function.
1431 gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1432 f = static_cast<EActivationFunction>(fString.Atoi());
1433
1434 // Read number of neurons.
1435 size_t width;
1436 auto matrixXML = gTools().GetChild(layerXML, "Weights");
1437 gTools().ReadAttr(matrixXML, "rows", width);
1438
1439 fNet.AddLayer(width, f);
1442 ReadMatrixXML(layerXML, "Weights", weights);
1443 ReadMatrixXML(layerXML, "Biases", biases);
1444 fNet.GetLayer(i).GetWeights() = weights;
1445 fNet.GetLayer(i).GetBiases() = biases;
1446
1449 }
1450}
1451
1452////////////////////////////////////////////////////////////////////////////////
1453
1454void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1455{
1456}
1457
1458////////////////////////////////////////////////////////////////////////////////
1459
1461{
1462 fRanking = new Ranking( GetName(), "Importance" );
1463 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1464 fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1465 }
1466 return fRanking;
1467}
1468
1469////////////////////////////////////////////////////////////////////////////////
1470
1471void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1472 const TString& /*className*/ ) const
1473{
1474}
1475
1476////////////////////////////////////////////////////////////////////////////////
1477
1479{
1480 // get help message text
1481 //
1482 // typical length of text line:
1483 // "|--------------------------------------------------------------|"
1484 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1486
1487 Log() << Endl;
1488 Log() << col << "--- Short description:" << colres << Endl;
1489 Log() << Endl;
1490 Log() << "The DNN neural network is a feedforward" << Endl;
1491 Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1492 Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1493 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1494 Log() << "signal and one background, regression or multiclass). " << Endl;
1495 Log() << Endl;
1496 Log() << col << "--- Performance optimisation:" << colres << Endl;
1497 Log() << Endl;
1498
1499 const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1500reduction of overfitting: \n \
1501\n \
1502 - different training settings can be stacked. Such that the initial training \n\
1503 is done with a large learning rate and a large drop out fraction whilst \n \
1504 in a later stage learning rate and drop out can be reduced. \n \
1505 - drop out \n \
1506 [recommended: \n \
1507 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1508 later training stage: 0.1 or 0.0 for all layers \n \
1509 final training stage: 0.0] \n \
1510 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1511 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1512 - L1 and L2 regularization are available \n \
1513 - Minibatches \n \
1514 [recommended 10 - 150] \n \
1515 Arbitrary mini-batch sizes can be chosen. \n \
1516 - Multithreading \n \
1517 [recommended: True] \n \
1518 Multithreading can be turned on. The minibatches are distributed to the available \n \
1519 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1520 \n \
1521 Options: \n \
1522 \"Layout\": \n \
1523 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1524 - meaning: \n \
1525 . two hidden layers (separated by \",\") \n \
1526 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1527 . the activation function for the output layer is LINEAR \n \
1528 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1529 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1530 . the number of nodes in the output layer is determined by the number of output nodes \n \
1531 and can therefore not be chosen freely. \n \
1532 \n \
1533 \"ErrorStrategy\": \n \
1534 - SUMOFSQUARES \n \
1535 The error of the neural net is determined by a sum-of-squares error function \n \
1536 For regression, this is the only possible choice. \n \
1537 - CROSSENTROPY \n \
1538 The error of the neural net is determined by a cross entropy function. The \n \
1539 output values are automatically (internally) transformed into probabilities \n \
1540 using a sigmoid function. \n \
1541 For signal/background classification this is the default choice. \n \
1542 For multiclass using cross entropy more than one or no output classes \n \
1543 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1544 A and C is true, Event 2: C is true, ...) \n \
1545 - MUTUALEXCLUSIVE \n \
1546 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1547 \n \
1548 \"WeightInitialization\" \n \
1549 - XAVIER \n \
1550 [recommended] \n \
1551 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1552 such that the variance of the values of the nodes is preserved for each layer. \n \
1553 - XAVIERUNIFORM \n \
1554 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1555 - LAYERSIZE \n \
1556 Random values scaled by the layer size \n \
1557 \n \
1558 \"TrainingStrategy\" \n \
1559 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1560 - explanation: two stacked training settings separated by \"|\" \n \
1561 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1562 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1563 . LearningRate : \n \
1564 - recommended for classification: 0.1 initially, 1e-4 later \n \
1565 - recommended for regression: 1e-4 and less \n \
1566 . Momentum : \n \
1567 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1568 . Repetitions : \n \
1569 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1570 . ConvergenceSteps : \n \
1571 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1572 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1573 cycle the test samples are evaluated and thus the convergence is checked) \n \
1574 . BatchSize \n \
1575 Size of the mini-batches. \n \
1576 . TestRepetitions \n \
1577 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1578 . WeightDecay \n \
1579 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1580 . Renormalize \n \
1581 NONE, L1 (|w|) or L2 (w^2) \n \
1582 . DropConfig \n \
1583 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1584 in the DropConfig. \n \
1585 [example: DropConfig=0.0+0.5+0.3 \n \
1586 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1587 in layer 2 \n \
1588 recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1589 turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1590 turned on (0.0) in later training stages] \n \
1591 . DropRepetitions \n \
1592 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1593 [recommended : 1] \n \
1594 . Multithreading \n \
1595 turn on multithreading [recommended: True] \n \
1596 \n";
1597 Log () << txt << Endl;
1598}
1599
1600} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
#define f(i)
Definition RSha256.hxx:104
#define g(i)
Definition RSha256.hxx:105
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
bool Bool_t
Definition RtypesCore.h:63
int Int_t
Definition RtypesCore.h:45
unsigned int UInt_t
Definition RtypesCore.h:46
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:374
#define X(type, name)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
The Formula class.
Definition TFormula.h:89
Bool_t WriteOptionsReference() const
Definition Config.h:65
Layer defines the layout of a layer.
Definition NeuralNet.h:673
neural net
Definition NeuralNet.h:1062
Settings for the training of the neural net.
Definition NeuralNet.h:730
Steepest Gradient Descent algorithm (SGD)
Definition NeuralNet.h:334
static void Copy(Matrix_t &B, const Matrix_t &A)
static void Copy(Matrix_t &B, const Matrix_t &A)
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Definition Minimizers.h:667
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Definition Minimizers.h:331
size_t GetTestInterval() const
Definition Minimizers.h:163
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Definition Minimizers.h:438
size_t GetConvergenceCount() const
Definition Minimizers.h:159
size_t GetConvergenceSteps() const
Definition Minimizers.h:160
Deep Neural Network Implementation.
Definition MethodDNN.h:77
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
virtual const std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
void ReadWeightsFromXML(void *wghtnode)
typename Architecture_t::Matrix_t Matrix_t
Definition MethodDNN.h:82
void ReadWeightsFromStream(std::istream &i)
LayoutVector_t ParseLayoutString(TString layerSpec)
void MakeClassSpecific(std::ostream &, const TString &) const
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
void ProcessOptions()
virtual ~MethodDNN()
virtual Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition MethodDNN.h:87
DNN::EInitialization fWeightInitialization
Definition MethodDNN.h:112
void DeclareOptions()
const Ranking * CreateRanking()
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
DNN::EOutputFunction fOutputFunction
Definition MethodDNN.h:113
void AddWeightsXMLTo(void *parent) const
void GetHelpMessage() const
virtual const std::vector< Float_t > & GetRegressionValues()
Ranking for variables in method (implementation)
Definition Ranking.h:48
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kWARNING
Definition Types.h:59
@ kFATAL
Definition Types.h:61
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:139
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1988
@ kTrailing
Definition TString.h:276
@ kBoth
Definition TString.h:276
void ToUpper()
Change string to upper case.
Definition TString.cxx:1195
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition TString.cxx:2092
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:39
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:760
TLine l
Definition textangle.C:4
static void output()