Logo ROOT  
Reference Guide
MethodDNN.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDNN *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * A neural network implementation *
12 * *
13 * Authors (alphabetical): *
14 * Simon Pfreundschuh <s.pfreundschuh@gmail.com> - CERN, Switzerland *
15 * Peter Speckmayer <peter.speckmayer@gmx.ch> - CERN, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28/*! \class TMVA::MethodDNN
29\ingroup TMVA
30Deep Neural Network Implementation.
31*/
32
33#include "TMVA/MethodDNN.h"
34
35#include "TString.h"
36#include "TTree.h"
37#include "TFile.h"
38#include "TFormula.h"
39#include "TObjString.h"
40
42#include "TMVA/Configurable.h"
43#include "TMVA/IMethod.h"
44#include "TMVA/MsgLogger.h"
45#include "TMVA/MethodBase.h"
46#include "TMVA/Timer.h"
47#include "TMVA/Types.h"
48#include "TMVA/Tools.h"
49#include "TMVA/Config.h"
50#include "TMVA/Ranking.h"
51
52#include "TMVA/DNN/Net.h"
54
55#include "TMVA/NeuralNet.h"
56#include "TMVA/Monitoring.h"
57
58#include <algorithm>
59#include <iostream>
60#include <string>
61#include <iomanip>
62
64
66
67namespace TMVA
68{
69 using namespace DNN;
70
71 ////////////////////////////////////////////////////////////////////////////////
72 /// standard constructor
73
74 TMVA::MethodDNN::MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData,
75 const TString &theOption)
76 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
77 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
78 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
79 {
80}
81
82////////////////////////////////////////////////////////////////////////////////
83/// constructor from a weight file
84
85TMVA::MethodDNN::MethodDNN(DataSetInfo& theData,
86 const TString& theWeightFile)
87 : MethodBase( Types::kDNN, theData, theWeightFile),
88 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
89 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
90 fTrainingSettings(), fResume(false), fSettings()
91{
92 fWeightInitialization = DNN::EInitialization::kGauss;
93 fOutputFunction = DNN::EOutputFunction::kSigmoid;
94}
95
96////////////////////////////////////////////////////////////////////////////////
97/// destructor
98
100{
103}
104
105////////////////////////////////////////////////////////////////////////////////
106/// MLP can handle classification with 2 classes and regression with
107/// one regression-target
108
110 UInt_t numberClasses,
111 UInt_t /*numberTargets*/ )
112{
113 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
114 if (type == Types::kMulticlass ) return kTRUE;
115 if (type == Types::kRegression ) return kTRUE;
116
117 return kFALSE;
118}
119
120////////////////////////////////////////////////////////////////////////////////
121/// default initializations
122
124 Log() << kWARNING
125 << "MethodDNN is deprecated and it will be removed in future ROOT version. "
126 "Please use MethodDL ( TMVA::kDL)"
127 << Endl;
128
129}
130
131////////////////////////////////////////////////////////////////////////////////
132/// Options to be set in the option string:
133///
134/// - LearningRate <float> DNN learning rate parameter.
135/// - DecayRate <float> Decay rate for learning parameter.
136/// - TestRate <int> Period of validation set error computation.
137/// - BatchSize <int> Number of event per batch.
138///
139/// - ValidationSize <string> How many events to use for validation. "0.2"
140/// or "20%" indicates that a fifth of the
141/// training data should be used. "100"
142/// indicates that 100 events should be used.
143
145{
146
147 DeclareOptionRef(fLayoutString="SOFTSIGN|(N+100)*2,LINEAR",
148 "Layout",
149 "Layout of the network.");
150
151 DeclareOptionRef(fValidationSize = "20%", "ValidationSize",
152 "Part of the training data to use for "
153 "validation. Specify as 0.2 or 20% to use a "
154 "fifth of the data set as validation set. "
155 "Specify as 100 to use exactly 100 events. "
156 "(Default: 20%)");
157
158 DeclareOptionRef(fErrorStrategy="CROSSENTROPY",
159 "ErrorStrategy",
160 "Loss function: Mean squared error (regression)"
161 " or cross entropy (binary classification).");
162 AddPreDefVal(TString("CROSSENTROPY"));
163 AddPreDefVal(TString("SUMOFSQUARES"));
164 AddPreDefVal(TString("MUTUALEXCLUSIVE"));
165
166 DeclareOptionRef(fWeightInitializationString="XAVIER",
167 "WeightInitialization",
168 "Weight initialization strategy");
169 AddPreDefVal(TString("XAVIER"));
170 AddPreDefVal(TString("XAVIERUNIFORM"));
171
172 DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
173 AddPreDefVal(TString("STANDARD"));
174 AddPreDefVal(TString("CPU"));
175 AddPreDefVal(TString("GPU"));
176 AddPreDefVal(TString("OPENCL"));
177
178 DeclareOptionRef(
179 fTrainingStrategyString = "LearningRate=1e-1,"
180 "Momentum=0.3,"
181 "Repetitions=3,"
182 "ConvergenceSteps=50,"
183 "BatchSize=30,"
184 "TestRepetitions=7,"
185 "WeightDecay=0.0,"
186 "Renormalize=L2,"
187 "DropConfig=0.0,"
188 "DropRepetitions=5|LearningRate=1e-4,"
189 "Momentum=0.3,"
190 "Repetitions=3,"
191 "ConvergenceSteps=50,"
192 "BatchSize=20,"
193 "TestRepetitions=7,"
194 "WeightDecay=0.001,"
195 "Renormalize=L2,"
196 "DropConfig=0.0+0.5+0.5,"
197 "DropRepetitions=5,"
198 "Multithreading=True",
199 "TrainingStrategy",
200 "Defines the training strategies.");
201}
202
203////////////////////////////////////////////////////////////////////////////////
204/// parse layout specification string and return a vector, each entry
205/// containing the number of neurons to go in each successive layer
206
208 -> LayoutVector_t
209{
210 LayoutVector_t layout;
211 const TString layerDelimiter(",");
212 const TString subDelimiter("|");
213
214 const size_t inputSize = GetNvar();
215
216 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
217 TIter nextLayer (layerStrings);
218 TObjString* layerString = (TObjString*)nextLayer ();
219
220 for (; layerString != nullptr; layerString = (TObjString*) nextLayer()) {
221 int numNodes = 0;
223
224 TObjArray* subStrings = layerString->GetString().Tokenize(subDelimiter);
225 TIter nextToken (subStrings);
226 TObjString* token = (TObjString *) nextToken();
227 int idxToken = 0;
228 for (; token != nullptr; token = (TObjString *) nextToken()) {
229 switch (idxToken)
230 {
231 case 0:
232 {
233 TString strActFnc (token->GetString ());
234 if (strActFnc == "RELU") {
235 activationFunction = DNN::EActivationFunction::kRelu;
236 } else if (strActFnc == "TANH") {
237 activationFunction = DNN::EActivationFunction::kTanh;
238 } else if (strActFnc == "SYMMRELU") {
239 activationFunction = DNN::EActivationFunction::kSymmRelu;
240 } else if (strActFnc == "SOFTSIGN") {
241 activationFunction = DNN::EActivationFunction::kSoftSign;
242 } else if (strActFnc == "SIGMOID") {
243 activationFunction = DNN::EActivationFunction::kSigmoid;
244 } else if (strActFnc == "LINEAR") {
245 activationFunction = DNN::EActivationFunction::kIdentity;
246 } else if (strActFnc == "GAUSS") {
247 activationFunction = DNN::EActivationFunction::kGauss;
248 }
249 }
250 break;
251 case 1: // number of nodes
252 {
253 TString strNumNodes (token->GetString ());
254 TString strN ("x");
255 strNumNodes.ReplaceAll ("N", strN);
256 strNumNodes.ReplaceAll ("n", strN);
257 TFormula fml ("tmp",strNumNodes);
258 numNodes = fml.Eval (inputSize);
259 }
260 break;
261 }
262 ++idxToken;
263 }
264 layout.push_back(std::make_pair(numNodes, activationFunction));
265 }
266 return layout;
267}
268
269////////////////////////////////////////////////////////////////////////////////
270/// parse key value pairs in blocks -> return vector of blocks with map of key value pairs
271
273 TString blockDelim,
274 TString tokenDelim)
275 -> KeyValueVector_t
276{
277 KeyValueVector_t blockKeyValues;
278 const TString keyValueDelim ("=");
279
280 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
281 TIter nextBlock (blockStrings);
282 TObjString* blockString = (TObjString *) nextBlock();
283
284 for (; blockString != nullptr; blockString = (TObjString *) nextBlock())
285 {
286 blockKeyValues.push_back (std::map<TString,TString>());
287 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
288
289 TObjArray* subStrings = blockString->GetString ().Tokenize (tokenDelim);
290 TIter nextToken (subStrings);
291 TObjString* token = (TObjString*)nextToken ();
292
293 for (; token != nullptr; token = (TObjString *)nextToken())
294 {
295 TString strKeyValue (token->GetString ());
296 int delimPos = strKeyValue.First (keyValueDelim.Data ());
297 if (delimPos <= 0)
298 continue;
299
300 TString strKey = TString (strKeyValue (0, delimPos));
301 strKey.ToUpper();
302 TString strValue = TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
303
304 strKey.Strip (TString::kBoth, ' ');
305 strValue.Strip (TString::kBoth, ' ');
306
307 currentBlock.insert (std::make_pair (strKey, strValue));
308 }
309 }
310 return blockKeyValues;
311}
312
313////////////////////////////////////////////////////////////////////////////////
314
315TString fetchValue (const std::map<TString, TString>& keyValueMap, TString key)
316{
317 key.ToUpper ();
318 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
319 if (it == keyValueMap.end()) {
320 return TString ("");
321 }
322 return it->second;
323}
324
325////////////////////////////////////////////////////////////////////////////////
326
327template <typename T>
328T fetchValue(const std::map<TString,TString>& keyValueMap,
329 TString key,
330 T defaultValue);
331
332////////////////////////////////////////////////////////////////////////////////
333
334template <>
335int fetchValue(const std::map<TString,TString>& keyValueMap,
336 TString key,
337 int defaultValue)
338{
339 TString value (fetchValue (keyValueMap, key));
340 if (value == "") {
341 return defaultValue;
342 }
343 return value.Atoi ();
344}
345
346////////////////////////////////////////////////////////////////////////////////
347
348template <>
349double fetchValue (const std::map<TString,TString>& keyValueMap,
350 TString key, double defaultValue)
351{
352 TString value (fetchValue (keyValueMap, key));
353 if (value == "") {
354 return defaultValue;
355 }
356 return value.Atof ();
357}
358
359////////////////////////////////////////////////////////////////////////////////
360
361template <>
362TString fetchValue (const std::map<TString,TString>& keyValueMap,
363 TString key, TString defaultValue)
364{
365 TString value (fetchValue (keyValueMap, key));
366 if (value == "") {
367 return defaultValue;
368 }
369 return value;
370}
371
372////////////////////////////////////////////////////////////////////////////////
373
374template <>
375bool fetchValue (const std::map<TString,TString>& keyValueMap,
376 TString key, bool defaultValue)
377{
378 TString value (fetchValue (keyValueMap, key));
379 if (value == "") {
380 return defaultValue;
381 }
382 value.ToUpper ();
383 if (value == "TRUE" || value == "T" || value == "1") {
384 return true;
385 }
386 return false;
387}
388
389////////////////////////////////////////////////////////////////////////////////
390
391template <>
392std::vector<double> fetchValue(const std::map<TString, TString> & keyValueMap,
393 TString key,
394 std::vector<double> defaultValue)
395{
396 TString parseString (fetchValue (keyValueMap, key));
397 if (parseString == "") {
398 return defaultValue;
399 }
400 parseString.ToUpper ();
401 std::vector<double> values;
402
403 const TString tokenDelim ("+");
404 TObjArray* tokenStrings = parseString.Tokenize (tokenDelim);
405 TIter nextToken (tokenStrings);
406 TObjString* tokenString = (TObjString*)nextToken ();
407 for (; tokenString != NULL; tokenString = (TObjString*)nextToken ()) {
408 std::stringstream sstr;
409 double currentValue;
410 sstr << tokenString->GetString ().Data ();
411 sstr >> currentValue;
412 values.push_back (currentValue);
413 }
414 return values;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418
420{
421 if (IgnoreEventsWithNegWeightsInTraining()) {
422 Log() << kINFO
423 << "Will ignore negative events in training!"
424 << Endl;
425 }
426
427 if (fArchitectureString == "STANDARD") {
428 Log() << kERROR << "The STANDARD architecture has been deprecated. "
429 "Please use Architecture=CPU or Architecture=CPU."
430 "See the TMVA Users' Guide for instructions if you "
431 "encounter problems."
432 << Endl;
433 Log() << kFATAL << "The STANDARD architecture has been deprecated. "
434 "Please use Architecture=CPU or Architecture=CPU."
435 "See the TMVA Users' Guide for instructions if you "
436 "encounter problems."
437 << Endl;
438 }
439
440 if (fArchitectureString == "OPENCL") {
441 Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
442 "Please use Architecture=CPU or Architecture=CPU for the "
443 "time being. See the TMVA Users' Guide for instructions "
444 "if you encounter problems."
445 << Endl;
446 Log() << kFATAL << "The OPENCL architecture has not been implemented yet. "
447 "Please use Architecture=CPU or Architecture=CPU for the "
448 "time being. See the TMVA Users' Guide for instructions "
449 "if you encounter problems."
450 << Endl;
451 }
452
453 if (fArchitectureString == "GPU") {
454#ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
455 Log() << kERROR << "CUDA backend not enabled. Please make sure "
456 "you have CUDA installed and it was successfully "
457 "detected by CMAKE."
458 << Endl;
459 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
460 "you have CUDA installed and it was successfully "
461 "detected by CMAKE."
462 << Endl;
463#endif // DNNCUDA
464 }
465
466 if (fArchitectureString == "CPU") {
467#ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
468 Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
469 "you have a BLAS implementation and it was successfully "
470 "detected by CMake as well that the imt CMake flag is set."
471 << Endl;
472 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
473 "you have a BLAS implementation and it was successfully "
474 "detected by CMake as well that the imt CMake flag is set."
475 << Endl;
476#endif // DNNCPU
477 }
478
479 //
480 // Set network structure.
481 //
482
483 fLayout = TMVA::MethodDNN::ParseLayoutString (fLayoutString);
484 size_t inputSize = GetNVariables ();
485 size_t outputSize = 1;
486 if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
487 outputSize = GetNTargets();
488 } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
489 outputSize = DataInfo().GetNClasses();
490 }
491
492 fNet.SetBatchSize(1);
493 fNet.SetInputWidth(inputSize);
494
495 auto itLayout = std::begin (fLayout);
496 auto itLayoutEnd = std::end (fLayout)-1;
497 for ( ; itLayout != itLayoutEnd; ++itLayout) {
498 fNet.AddLayer((*itLayout).first, (*itLayout).second);
499 }
500 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
501
502 //
503 // Loss function and output.
504 //
505
506 fOutputFunction = EOutputFunction::kSigmoid;
507 if (fAnalysisType == Types::kClassification)
508 {
509 if (fErrorStrategy == "SUMOFSQUARES") {
510 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
511 }
512 if (fErrorStrategy == "CROSSENTROPY") {
513 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
514 }
515 fOutputFunction = EOutputFunction::kSigmoid;
516 } else if (fAnalysisType == Types::kRegression) {
517 if (fErrorStrategy != "SUMOFSQUARES") {
518 Log () << kWARNING << "For regression only SUMOFSQUARES is a valid "
519 << " neural net error function. Setting error function to "
520 << " SUMOFSQUARES now." << Endl;
521 }
522 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
523 fOutputFunction = EOutputFunction::kIdentity;
524 } else if (fAnalysisType == Types::kMulticlass) {
525 if (fErrorStrategy == "SUMOFSQUARES") {
526 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
527 }
528 if (fErrorStrategy == "CROSSENTROPY") {
529 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
530 }
531 if (fErrorStrategy == "MUTUALEXCLUSIVE") {
532 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
533 }
534 fOutputFunction = EOutputFunction::kSoftmax;
535 }
536
537 //
538 // Initialization
539 //
540
541 if (fWeightInitializationString == "XAVIER") {
542 fWeightInitialization = DNN::EInitialization::kGauss;
543 }
544 else if (fWeightInitializationString == "XAVIERUNIFORM") {
545 fWeightInitialization = DNN::EInitialization::kUniform;
546 }
547 else {
548 fWeightInitialization = DNN::EInitialization::kGauss;
549 }
550
551 //
552 // Training settings.
553 //
554
555 // Force validation of the ValidationSize option
556 GetNumValidationSamples();
557
558 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
559 TString ("|"),
560 TString (","));
561
562 std::cout << "Parsed Training DNN string " << fTrainingStrategyString << std::endl;
563 std::cout << "STring has size " << strategyKeyValues.size() << std::endl;
564 for (auto& block : strategyKeyValues) {
565 TTrainingSettings settings;
566
567 settings.convergenceSteps = fetchValue(block, "ConvergenceSteps", 100);
568 settings.batchSize = fetchValue(block, "BatchSize", 30);
569 settings.testInterval = fetchValue(block, "TestRepetitions", 7);
570 settings.weightDecay = fetchValue(block, "WeightDecay", 0.0);
571 settings.learningRate = fetchValue(block, "LearningRate", 1e-5);
572 settings.momentum = fetchValue(block, "Momentum", 0.3);
573 settings.dropoutProbabilities = fetchValue(block, "DropConfig",
574 std::vector<Double_t>());
575
576 TString regularization = fetchValue(block, "Regularization",
577 TString ("NONE"));
578 if (regularization == "L1") {
580 } else if (regularization == "L2") {
582 } else {
584 }
585
586 TString strMultithreading = fetchValue(block, "Multithreading",
587 TString ("True"));
588 if (strMultithreading.BeginsWith ("T")) {
589 settings.multithreading = true;
590 } else {
591 settings.multithreading = false;
592 }
593
594 fTrainingSettings.push_back(settings);
595 }
596}
597
598////////////////////////////////////////////////////////////////////////////////
599/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
600/// 100 etc.
601/// - 20% and 0.2 selects 20% of the training set as validation data.
602/// - 100 selects 100 events as the validation data.
603///
604/// @return number of samples in validation set
605///
606
608{
609 Int_t nValidationSamples = 0;
610 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
611
612 // Parsing + Validation
613 // --------------------
614 if (fValidationSize.EndsWith("%")) {
615 // Relative spec. format 20%
616 TString intValStr = TString(fValidationSize.Strip(TString::kTrailing, '%'));
617
618 if (intValStr.IsFloat()) {
619 Double_t valSizeAsDouble = fValidationSize.Atof() / 100.0;
620 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
621 } else {
622 Log() << kFATAL << "Cannot parse number \"" << fValidationSize
623 << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
624 }
625 } else if (fValidationSize.IsFloat()) {
626 Double_t valSizeAsDouble = fValidationSize.Atof();
627
628 if (valSizeAsDouble < 1.0) {
629 // Relative spec. format 0.2
630 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
631 } else {
632 // Absolute spec format 100 or 100.0
633 nValidationSamples = valSizeAsDouble;
634 }
635 } else {
636 Log() << kFATAL << "Cannot parse number \"" << fValidationSize << "\". Expected string like \"0.2\" or \"100\"."
637 << Endl;
638 }
639
640 // Value validation
641 // ----------------
642 if (nValidationSamples < 0) {
643 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is negative." << Endl;
644 }
645
646 if (nValidationSamples == 0) {
647 Log() << kFATAL << "Validation size \"" << fValidationSize << "\" is zero." << Endl;
648 }
649
650 if (nValidationSamples >= (Int_t)trainingSetSize) {
651 Log() << kFATAL << "Validation size \"" << fValidationSize
652 << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
653 }
654
655 return nValidationSamples;
656}
657
658////////////////////////////////////////////////////////////////////////////////
659
661{
662 if (fInteractive && fInteractive->NotInitialized()){
663 std::vector<TString> titles = {"Error on training set", "Error on test set"};
664 fInteractive->Init(titles);
665 // JsMVA progress bar maximum (100%)
666 fIPyMaxIter = 100;
667 }
668
669 for (TTrainingSettings & settings : fTrainingSettings) {
670 size_t nValidationSamples = GetNumValidationSamples();
671 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
672 size_t nTestSamples = nValidationSamples;
673
674 if (nTrainingSamples < settings.batchSize ||
675 nValidationSamples < settings.batchSize ||
676 nTestSamples < settings.batchSize) {
677 Log() << kFATAL << "Number of samples in the datasets are train: "
678 << nTrainingSamples << " valid: " << nValidationSamples
679 << " test: " << nTestSamples << ". "
680 << "One of these is smaller than the batch size of "
681 << settings.batchSize << ". Please increase the batch"
682 << " size to be at least the same size as the smallest"
683 << " of these values." << Endl;
684 }
685 }
686
687 if (fArchitectureString == "GPU") {
688 TrainGpu();
689 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
690 ExitFromTraining();
691 return;
692 } else if (fArchitectureString == "OpenCL") {
693 Log() << kFATAL << "OpenCL backend not yet supported." << Endl;
694 return;
695 } else if (fArchitectureString == "CPU") {
696 TrainCpu();
697 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
698 ExitFromTraining();
699 return;
700 }
701
702 Log() << kINFO << "Using Standard Implementation.";
703
704 std::vector<Pattern> trainPattern;
705 std::vector<Pattern> testPattern;
706
707 size_t nValidationSamples = GetNumValidationSamples();
708 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
709
710 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
711 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
712 const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
713
714 for (auto &event : eventCollectionTraining) {
715 const std::vector<Float_t>& values = event->GetValues();
716 if (fAnalysisType == Types::kClassification) {
717 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
718 trainPattern.push_back(Pattern (values.begin(),
719 values.end(),
720 outputValue,
721 event->GetWeight()));
722 trainPattern.back().addInput(1.0);
723 } else if (fAnalysisType == Types::kMulticlass) {
724 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
725 oneHot[event->GetClass()] = 1.0;
726 trainPattern.push_back(Pattern (values.begin(), values.end(),
727 oneHot.cbegin(), oneHot.cend(),
728 event->GetWeight()));
729 trainPattern.back().addInput(1.0);
730 } else {
731 const std::vector<Float_t>& targets = event->GetTargets ();
732 trainPattern.push_back(Pattern(values.begin(),
733 values.end(),
734 targets.begin(),
735 targets.end(),
736 event->GetWeight ()));
737 trainPattern.back ().addInput (1.0); // bias node
738 }
739 }
740
741 for (auto &event : eventCollectionTesting) {
742 const std::vector<Float_t>& values = event->GetValues();
743 if (fAnalysisType == Types::kClassification) {
744 double outputValue = event->GetClass () == 0 ? 0.9 : 0.1;
745 testPattern.push_back(Pattern (values.begin(),
746 values.end(),
747 outputValue,
748 event->GetWeight()));
749 testPattern.back().addInput(1.0);
750 } else if (fAnalysisType == Types::kMulticlass) {
751 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
752 oneHot[event->GetClass()] = 1.0;
753 testPattern.push_back(Pattern (values.begin(), values.end(),
754 oneHot.cbegin(), oneHot.cend(),
755 event->GetWeight()));
756 testPattern.back().addInput(1.0);
757 } else {
758 const std::vector<Float_t>& targets = event->GetTargets ();
759 testPattern.push_back(Pattern(values.begin(),
760 values.end(),
761 targets.begin(),
762 targets.end(),
763 event->GetWeight ()));
764 testPattern.back ().addInput (1.0); // bias node
765 }
766 }
767
768 TMVA::DNN::Net net;
769 std::vector<double> weights;
770
771 net.SetIpythonInteractive(fInteractive, &fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
772
773 net.setInputSize(fNet.GetInputWidth() + 1);
774 net.setOutputSize(fNet.GetOutputWidth() + 1);
775
776 for (size_t i = 0; i < fNet.GetDepth(); i++) {
777 EActivationFunction f = fNet.GetLayer(i).GetActivationFunction();
778 EnumFunction g = EnumFunction::LINEAR;
779 switch(f) {
780 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR; break;
781 case EActivationFunction::kRelu: g = EnumFunction::RELU; break;
782 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID; break;
783 case EActivationFunction::kTanh: g = EnumFunction::TANH; break;
784 case EActivationFunction::kFastTanh: g = EnumFunction::TANH; break;
785 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU; break;
786 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN; break;
787 case EActivationFunction::kGauss: g = EnumFunction::GAUSS; break;
788 }
789 if (i < fNet.GetDepth() - 1) {
790 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g));
791 } else {
792 ModeOutputValues h = ModeOutputValues::DIRECT;
793 switch(fOutputFunction) {
794 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT; break;
795 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID; break;
796 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX; break;
797 }
798 net.addLayer(Layer(fNet.GetLayer(i).GetWidth(), g, h));
799 }
800 }
801
802 switch(fNet.GetLossFunction()) {
803 case ELossFunction::kMeanSquaredError:
804 net.setErrorFunction(ModeErrorFunction::SUMOFSQUARES);
805 break;
807 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY);
808 break;
809 case ELossFunction::kSoftmaxCrossEntropy:
810 net.setErrorFunction(ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE);
811 break;
812 }
813
814 switch(fWeightInitialization) {
816 net.initializeWeights(WeightInitializationStrategy::XAVIER,
817 std::back_inserter(weights));
818 break;
820 net.initializeWeights(WeightInitializationStrategy::XAVIERUNIFORM,
821 std::back_inserter(weights));
822 break;
823 default:
824 net.initializeWeights(WeightInitializationStrategy::XAVIER,
825 std::back_inserter(weights));
826 break;
827 }
828
829 int idxSetting = 0;
830 for (auto s : fTrainingSettings) {
831
833 switch(s.regularization) {
835 case ERegularization::kL1: r = EnumRegularization::L1; break;
836 case ERegularization::kL2: r = EnumRegularization::L2; break;
837 }
838
839 Settings * settings = new Settings(TString(), s.convergenceSteps, s.batchSize,
840 s.testInterval, s.weightDecay, r,
841 MinimizerType::fSteepest, s.learningRate,
842 s.momentum, 1, s.multithreading);
843 std::shared_ptr<Settings> ptrSettings(settings);
844 ptrSettings->setMonitoring (0);
845 Log() << kINFO
846 << "Training with learning rate = " << ptrSettings->learningRate ()
847 << ", momentum = " << ptrSettings->momentum ()
848 << ", repetitions = " << ptrSettings->repetitions ()
849 << Endl;
850
851 ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
852 (idxSetting+1)*100.0/(fSettings.size ()));
853
854 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
855 if (!dropConfig.empty ()) {
856 Log () << kINFO << "Drop configuration" << Endl
857 << " drop repetitions = " << ptrSettings->dropRepetitions()
858 << Endl;
859 }
860
861 int idx = 0;
862 for (auto f : dropConfig) {
863 Log () << kINFO << " Layer " << idx << " = " << f << Endl;
864 ++idx;
865 }
866 Log () << kINFO << Endl;
867
868 DNN::Steepest minimizer(ptrSettings->learningRate(),
869 ptrSettings->momentum(),
870 ptrSettings->repetitions());
871 net.train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
872 ptrSettings.reset();
873 Log () << kINFO << Endl;
874 idxSetting++;
875 }
876 size_t weightIndex = 0;
877 for (size_t l = 0; l < fNet.GetDepth(); l++) {
878 auto & layerWeights = fNet.GetLayer(l).GetWeights();
879 for (Int_t j = 0; j < layerWeights.GetNcols(); j++) {
880 for (Int_t i = 0; i < layerWeights.GetNrows(); i++) {
881 layerWeights(i,j) = weights[weightIndex];
882 weightIndex++;
883 }
884 }
885 auto & layerBiases = fNet.GetLayer(l).GetBiases();
886 if (l == 0) {
887 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
888 layerBiases(i,0) = weights[weightIndex];
889 weightIndex++;
890 }
891 } else {
892 for (Int_t i = 0; i < layerBiases.GetNrows(); i++) {
893 layerBiases(i,0) = 0.0;
894 }
895 }
896 }
897 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
898 ExitFromTraining();
899}
900
901////////////////////////////////////////////////////////////////////////////////
902
904{
905
906#ifdef DNNCUDA // Included only if DNNCUDA flag is set.
907 Log() << kINFO << "Start of neural network training on GPU." << Endl << Endl;
908
909 size_t nValidationSamples = GetNumValidationSamples();
910 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
911 size_t nTestSamples = nValidationSamples;
912
913 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
914 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
915
916 size_t trainingPhase = 1;
917 fNet.Initialize(fWeightInitialization);
918 for (TTrainingSettings & settings : fTrainingSettings) {
919
920 if (fInteractive){
921 fInteractive->ClearGraphs();
922 }
923
924 TNet<TCuda<>> net(settings.batchSize, fNet);
925 net.SetWeightDecay(settings.weightDecay);
926 net.SetRegularization(settings.regularization);
927
928 // Need to convert dropoutprobabilities to conventions used
929 // by backend implementation.
930 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
931 for (auto & p : dropoutVector) {
932 p = 1.0 - p;
933 }
934 net.SetDropoutProbabilities(dropoutVector);
935
937 auto testNet = net.CreateClone(settings.batchSize);
938
939 Log() << kINFO << "Training phase " << trainingPhase << " of "
940 << fTrainingSettings.size() << ":" << Endl;
941 trainingPhase++;
942
943 using DataLoader_t = TDataLoader<TMVAInput_t, TCuda<>>;
944
945 // Split training data into training and validation set
946 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
947 const std::vector<Event *> trainingInputData =
948 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
949 const std::vector<Event *> testInputData =
950 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
951
952 if (trainingInputData.size() != nTrainingSamples) {
953 Log() << kFATAL << "Inconsistent training sample size" << Endl;
954 }
955 if (testInputData.size() != nTestSamples) {
956 Log() << kFATAL << "Inconsistent test sample size" << Endl;
957 }
958
959 size_t nThreads = 1;
960 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
961 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
962 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
963 net.GetBatchSize(), net.GetInputWidth(),
964 net.GetOutputWidth(), nThreads);
965 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
966 net.GetInputWidth(), net.GetOutputWidth(),
967 nThreads);
968 DNN::TGradientDescent<TCuda<>> minimizer(settings.learningRate,
969 settings.convergenceSteps,
970 settings.testInterval);
971
972 std::vector<TNet<TCuda<>>> nets{};
973 std::vector<TBatch<TCuda<>>> batches{};
974 nets.reserve(nThreads);
975 for (size_t i = 0; i < nThreads; i++) {
976 nets.push_back(net);
977 for (size_t j = 0; j < net.GetDepth(); j++)
978 {
979 auto &masterLayer = net.GetLayer(j);
980 auto &layer = nets.back().GetLayer(j);
981 TCuda<>::Copy(layer.GetWeights(),
982 masterLayer.GetWeights());
983 TCuda<>::Copy(layer.GetBiases(),
984 masterLayer.GetBiases());
985 }
986 }
987
988 bool converged = false;
989 size_t stepCount = 0;
990 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
991
992 std::chrono::time_point<std::chrono::system_clock> start, end;
993 start = std::chrono::system_clock::now();
994
995 if (!fInteractive) {
996 Log() << std::setw(10) << "Epoch" << " | "
997 << std::setw(12) << "Train Err."
998 << std::setw(12) << "Test Err."
999 << std::setw(12) << "GFLOP/s"
1000 << std::setw(12) << "Conv. Steps" << Endl;
1001 std::string separator(62, '-');
1002 Log() << separator << Endl;
1003 }
1004
1005 while (!converged)
1006 {
1007 stepCount++;
1008
1009 // Perform minimization steps for a full epoch.
1010 trainingData.Shuffle();
1011 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1012 batches.clear();
1013 for (size_t j = 0; j < nThreads; j++) {
1014 batches.reserve(nThreads);
1015 batches.push_back(trainingData.GetBatch());
1016 }
1017 if (settings.momentum > 0.0) {
1018 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1019 } else {
1020 minimizer.Step(net, nets, batches);
1021 }
1022 }
1023
1024 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1025
1026 // Compute test error.
1027 Double_t testError = 0.0;
1028 for (auto batch : testData) {
1029 auto inputMatrix = batch.GetInput();
1030 auto outputMatrix = batch.GetOutput();
1031 testError += testNet.Loss(inputMatrix, outputMatrix);
1032 }
1033 testError /= (Double_t) (nTestSamples / settings.batchSize);
1034
1035 //Log the loss value
1036 fTrainHistory.AddValue("testError",stepCount,testError);
1037
1038 end = std::chrono::system_clock::now();
1039
1040 // Compute training error.
1041 Double_t trainingError = 0.0;
1042 for (auto batch : trainingData) {
1043 auto inputMatrix = batch.GetInput();
1044 auto outputMatrix = batch.GetOutput();
1045 trainingError += net.Loss(inputMatrix, outputMatrix);
1046 }
1047 trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1048 //Log the loss value
1049 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1050
1051 // Compute numerical throughput.
1052 std::chrono::duration<double> elapsed_seconds = end - start;
1053 double seconds = elapsed_seconds.count();
1054 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1055 nFlops *= net.GetNFlops() * 1e-9;
1056
1057 converged = minimizer.HasConverged(testError);
1058 start = std::chrono::system_clock::now();
1059
1060 if (fInteractive) {
1061 fInteractive->AddPoint(stepCount, trainingError, testError);
1062 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1063 / minimizer.GetConvergenceSteps ();
1064 if (fExitFromTraining) break;
1065 } else {
1066 Log() << std::setw(10) << stepCount << " | "
1067 << std::setw(12) << trainingError
1068 << std::setw(12) << testError
1069 << std::setw(12) << nFlops / seconds
1070 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1071 if (converged) {
1072 Log() << Endl;
1073 }
1074 }
1075 }
1076 }
1077 for (size_t l = 0; l < net.GetDepth(); l++) {
1078 fNet.GetLayer(l).GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1079 fNet.GetLayer(l).GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1080 }
1081 }
1082
1083#else // DNNCUDA flag not set.
1084
1085 Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1086 "you have CUDA installed and it was successfully "
1087 "detected by CMAKE." << Endl;
1088#endif // DNNCUDA
1089}
1090
1091////////////////////////////////////////////////////////////////////////////////
1092
1094{
1095
1096#ifdef DNNCPU // Included only if DNNCPU flag is set.
1097 Log() << kINFO << "Start of neural network training on CPU." << Endl << Endl;
1098
1099 size_t nValidationSamples = GetNumValidationSamples();
1100 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1101 size_t nTestSamples = nValidationSamples;
1102
1103 Log() << kDEBUG << "Using " << nValidationSamples << " validation samples." << Endl;
1104 Log() << kDEBUG << "Using " << nTestSamples << " training samples." << Endl;
1105
1106 fNet.Initialize(fWeightInitialization);
1107
1108 size_t trainingPhase = 1;
1109 for (TTrainingSettings & settings : fTrainingSettings) {
1110
1111 if (fInteractive){
1112 fInteractive->ClearGraphs();
1113 }
1114
1115 Log() << "Training phase " << trainingPhase << " of "
1116 << fTrainingSettings.size() << ":" << Endl;
1117 trainingPhase++;
1118
1119 TNet<TCpu<>> net(settings.batchSize, fNet);
1120 net.SetWeightDecay(settings.weightDecay);
1121 net.SetRegularization(settings.regularization);
1122 // Need to convert dropoutprobabilities to conventions used
1123 // by backend implementation.
1124 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1125 for (auto & p : dropoutVector) {
1126 p = 1.0 - p;
1127 }
1128 net.SetDropoutProbabilities(dropoutVector);
1129 net.InitializeGradients();
1130 auto testNet = net.CreateClone(settings.batchSize);
1131
1132 using DataLoader_t = TDataLoader<TMVAInput_t, TCpu<>>;
1133
1134 // Split training data into training and validation set
1135 const std::vector<Event *> &allData = GetEventCollection(Types::kTraining);
1136 const std::vector<Event *> trainingInputData =
1137 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1138 const std::vector<Event *> testInputData =
1139 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1140
1141 if (trainingInputData.size() != nTrainingSamples) {
1142 Log() << kFATAL << "Inconsistent training sample size" << Endl;
1143 }
1144 if (testInputData.size() != nTestSamples) {
1145 Log() << kFATAL << "Inconsistent test sample size" << Endl;
1146 }
1147
1148 size_t nThreads = 1;
1149 TMVAInput_t trainingTuple = std::tie(trainingInputData, DataInfo());
1150 TMVAInput_t testTuple = std::tie(testInputData, DataInfo());
1151 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1152 net.GetBatchSize(), net.GetInputWidth(),
1153 net.GetOutputWidth(), nThreads);
1154 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1155 net.GetInputWidth(), net.GetOutputWidth(),
1156 nThreads);
1157 DNN::TGradientDescent<TCpu<>> minimizer(settings.learningRate,
1158 settings.convergenceSteps,
1159 settings.testInterval);
1160
1161 std::vector<TNet<TCpu<>>> nets{};
1162 std::vector<TBatch<TCpu<>>> batches{};
1163 nets.reserve(nThreads);
1164 for (size_t i = 0; i < nThreads; i++) {
1165 nets.push_back(net);
1166 for (size_t j = 0; j < net.GetDepth(); j++)
1167 {
1168 auto &masterLayer = net.GetLayer(j);
1169 auto &layer = nets.back().GetLayer(j);
1170 TCpu<>::Copy(layer.GetWeights(),
1171 masterLayer.GetWeights());
1172 TCpu<>::Copy(layer.GetBiases(),
1173 masterLayer.GetBiases());
1174 }
1175 }
1176
1177 bool converged = false;
1178 size_t stepCount = 0;
1179 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
1180
1181 std::chrono::time_point<std::chrono::system_clock> start, end;
1182 start = std::chrono::system_clock::now();
1183
1184 if (!fInteractive) {
1185 Log() << std::setw(10) << "Epoch" << " | "
1186 << std::setw(12) << "Train Err."
1187 << std::setw(12) << "Test Err."
1188 << std::setw(12) << "GFLOP/s"
1189 << std::setw(12) << "Conv. Steps" << Endl;
1190 std::string separator(62, '-');
1191 Log() << separator << Endl;
1192 }
1193
1194 while (!converged)
1195 {
1196 stepCount++;
1197 // Perform minimization steps for a full epoch.
1198 trainingData.Shuffle();
1199 for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1200 batches.clear();
1201 for (size_t j = 0; j < nThreads; j++) {
1202 batches.reserve(nThreads);
1203 batches.push_back(trainingData.GetBatch());
1204 }
1205 if (settings.momentum > 0.0) {
1206 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1207 } else {
1208 minimizer.Step(net, nets, batches);
1209 }
1210 }
1211
1212 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1213
1214 // Compute test error.
1215 Double_t testError = 0.0;
1216 for (auto batch : testData) {
1217 auto inputMatrix = batch.GetInput();
1218 auto outputMatrix = batch.GetOutput();
1219 auto weightMatrix = batch.GetWeights();
1220 testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1221 }
1222 testError /= (Double_t) (nTestSamples / settings.batchSize);
1223
1224 //Log the loss value
1225 fTrainHistory.AddValue("testError",stepCount,testError);
1226
1227 end = std::chrono::system_clock::now();
1228
1229 // Compute training error.
1230 Double_t trainingError = 0.0;
1231 for (auto batch : trainingData) {
1232 auto inputMatrix = batch.GetInput();
1233 auto outputMatrix = batch.GetOutput();
1234 auto weightMatrix = batch.GetWeights();
1235 trainingError += net.Loss(inputMatrix, outputMatrix, weightMatrix);
1236 }
1237 trainingError /= (Double_t) (nTrainingSamples / settings.batchSize);
1238
1239 //Log the loss value
1240 fTrainHistory.AddValue("trainingError",stepCount,trainingError);
1241
1242 if (fInteractive){
1243 fInteractive->AddPoint(stepCount, trainingError, testError);
1244 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1245 if (fExitFromTraining) break;
1246 }
1247
1248 // Compute numerical throughput.
1249 std::chrono::duration<double> elapsed_seconds = end - start;
1250 double seconds = elapsed_seconds.count();
1251 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1252 nFlops *= net.GetNFlops() * 1e-9;
1253
1254 converged = minimizer.HasConverged(testError);
1255 start = std::chrono::system_clock::now();
1256
1257 if (fInteractive) {
1258 fInteractive->AddPoint(stepCount, trainingError, testError);
1259 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1260 / minimizer.GetConvergenceSteps ();
1261 if (fExitFromTraining) break;
1262 } else {
1263 Log() << std::setw(10) << stepCount << " | "
1264 << std::setw(12) << trainingError
1265 << std::setw(12) << testError
1266 << std::setw(12) << nFlops / seconds
1267 << std::setw(12) << minimizer.GetConvergenceCount() << Endl;
1268 if (converged) {
1269 Log() << Endl;
1270 }
1271 }
1272 }
1273 }
1274
1275
1276 for (size_t l = 0; l < net.GetDepth(); l++) {
1277 auto & layer = fNet.GetLayer(l);
1278 layer.GetWeights() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetWeights();
1279 layer.GetBiases() = (TMatrixT<Scalar_t>) net.GetLayer(l).GetBiases();
1280 }
1281 }
1282
1283#else // DNNCPU flag not set.
1284 Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1285 "you have a BLAS implementation and it was successfully "
1286 "detected by CMake as well that the imt CMake flag is set." << Endl;
1287#endif // DNNCPU
1288}
1289
1290////////////////////////////////////////////////////////////////////////////////
1291
1293{
1294 size_t nVariables = GetEvent()->GetNVariables();
1295 Matrix_t X(1, nVariables);
1296 Matrix_t YHat(1, 1);
1297
1298 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1299 for (size_t i = 0; i < nVariables; i++) {
1300 X(0,i) = inputValues[i];
1301 }
1302
1303 fNet.Prediction(YHat, X, fOutputFunction);
1304 return YHat(0,0);
1305}
1306
1307////////////////////////////////////////////////////////////////////////////////
1308
1309const std::vector<Float_t> & TMVA::MethodDNN::GetRegressionValues()
1310{
1311 size_t nVariables = GetEvent()->GetNVariables();
1312 Matrix_t X(1, nVariables);
1313
1314 const Event *ev = GetEvent();
1315 const std::vector<Float_t>& inputValues = ev->GetValues();
1316 for (size_t i = 0; i < nVariables; i++) {
1317 X(0,i) = inputValues[i];
1318 }
1319
1320 size_t nTargets = std::max(1u, ev->GetNTargets());
1321 Matrix_t YHat(1, nTargets);
1322 std::vector<Float_t> output(nTargets);
1323 auto net = fNet.CreateClone(1);
1324 net.Prediction(YHat, X, fOutputFunction);
1325
1326 for (size_t i = 0; i < nTargets; i++)
1327 output[i] = YHat(0, i);
1328
1329 if (fRegressionReturnVal == NULL) {
1330 fRegressionReturnVal = new std::vector<Float_t>();
1331 }
1332 fRegressionReturnVal->clear();
1333
1334 Event * evT = new Event(*ev);
1335 for (size_t i = 0; i < nTargets; ++i) {
1336 evT->SetTarget(i, output[i]);
1337 }
1338
1339 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1340 for (size_t i = 0; i < nTargets; ++i) {
1341 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1342 }
1343 delete evT;
1344 return *fRegressionReturnVal;
1345}
1346
1347const std::vector<Float_t> & TMVA::MethodDNN::GetMulticlassValues()
1348{
1349 size_t nVariables = GetEvent()->GetNVariables();
1350 Matrix_t X(1, nVariables);
1351 Matrix_t YHat(1, DataInfo().GetNClasses());
1352 if (fMulticlassReturnVal == NULL) {
1353 fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1354 }
1355
1356 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1357 for (size_t i = 0; i < nVariables; i++) {
1358 X(0,i) = inputValues[i];
1359 }
1360
1361 fNet.Prediction(YHat, X, fOutputFunction);
1362 for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1363 (*fMulticlassReturnVal)[i] = YHat(0, i);
1364 }
1365 return *fMulticlassReturnVal;
1366}
1367
1368////////////////////////////////////////////////////////////////////////////////
1369
1370void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const
1371{
1372 void* nn = gTools().xmlengine().NewChild(parent, 0, "Weights");
1373 Int_t inputWidth = fNet.GetInputWidth();
1374 Int_t depth = fNet.GetDepth();
1375 char lossFunction = static_cast<char>(fNet.GetLossFunction());
1376 gTools().xmlengine().NewAttr(nn, 0, "InputWidth",
1377 gTools().StringFromInt(inputWidth));
1378 gTools().xmlengine().NewAttr(nn, 0, "Depth", gTools().StringFromInt(depth));
1379 gTools().xmlengine().NewAttr(nn, 0, "LossFunction", TString(lossFunction));
1380 gTools().xmlengine().NewAttr(nn, 0, "OutputFunction",
1381 TString(static_cast<char>(fOutputFunction)));
1382
1383 for (Int_t i = 0; i < depth; i++) {
1384 const auto& layer = fNet.GetLayer(i);
1385 auto layerxml = gTools().xmlengine().NewChild(nn, 0, "Layer");
1386 int activationFunction = static_cast<int>(layer.GetActivationFunction());
1387 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
1388 TString::Itoa(activationFunction, 10));
1389 WriteMatrixXML(layerxml, "Weights", layer.GetWeights());
1390 WriteMatrixXML(layerxml, "Biases", layer.GetBiases());
1391 }
1392}
1393
1394////////////////////////////////////////////////////////////////////////////////
1395
1397{
1398 auto netXML = gTools().GetChild(rootXML, "Weights");
1399 if (!netXML){
1400 netXML = rootXML;
1401 }
1402
1403 fNet.Clear();
1404 fNet.SetBatchSize(1);
1405
1406 size_t inputWidth, depth;
1407 gTools().ReadAttr(netXML, "InputWidth", inputWidth);
1408 gTools().ReadAttr(netXML, "Depth", depth);
1409 char lossFunctionChar;
1410 gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
1411 char outputFunctionChar;
1412 gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
1413
1414 fNet.SetInputWidth(inputWidth);
1415 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1416 fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
1417
1418 size_t previousWidth = inputWidth;
1419 auto layerXML = gTools().xmlengine().GetChild(netXML, "Layer");
1420 for (size_t i = 0; i < depth; i++) {
1421 TString fString;
1423
1424 // Read activation function.
1425 gTools().ReadAttr(layerXML, "ActivationFunction", fString);
1426 f = static_cast<EActivationFunction>(fString.Atoi());
1427
1428 // Read number of neurons.
1429 size_t width;
1430 auto matrixXML = gTools().GetChild(layerXML, "Weights");
1431 gTools().ReadAttr(matrixXML, "rows", width);
1432
1433 fNet.AddLayer(width, f);
1434 TMatrixT<Double_t> weights(width, previousWidth);
1435 TMatrixT<Double_t> biases(width, 1);
1436 ReadMatrixXML(layerXML, "Weights", weights);
1437 ReadMatrixXML(layerXML, "Biases", biases);
1438 fNet.GetLayer(i).GetWeights() = weights;
1439 fNet.GetLayer(i).GetBiases() = biases;
1440
1441 layerXML = gTools().GetNextChild(layerXML);
1442 previousWidth = width;
1443 }
1444}
1445
1446////////////////////////////////////////////////////////////////////////////////
1447
1448void TMVA::MethodDNN::ReadWeightsFromStream( std::istream & /*istr*/)
1449{
1450}
1451
1452////////////////////////////////////////////////////////////////////////////////
1453
1455{
1456 fRanking = new Ranking( GetName(), "Importance" );
1457 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1458 fRanking->AddRank( Rank( GetInputLabel(ivar), 1.0));
1459 }
1460 return fRanking;
1461}
1462
1463////////////////////////////////////////////////////////////////////////////////
1464
1465void TMVA::MethodDNN::MakeClassSpecific( std::ostream& /*fout*/,
1466 const TString& /*className*/ ) const
1467{
1468}
1469
1470////////////////////////////////////////////////////////////////////////////////
1471
1473{
1474 // get help message text
1475 //
1476 // typical length of text line:
1477 // "|--------------------------------------------------------------|"
1478 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1479 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1480
1481 Log() << Endl;
1482 Log() << col << "--- Short description:" << colres << Endl;
1483 Log() << Endl;
1484 Log() << "The DNN neural network is a feedforward" << Endl;
1485 Log() << "multilayer perceptron implementation. The DNN has a user-" << Endl;
1486 Log() << "defined hidden layer architecture, where the number of input (output)" << Endl;
1487 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1488 Log() << "signal and one background, regression or multiclass). " << Endl;
1489 Log() << Endl;
1490 Log() << col << "--- Performance optimisation:" << colres << Endl;
1491 Log() << Endl;
1492
1493 const char* txt = "The DNN supports various options to improve performance in terms of training speed and \n \
1494reduction of overfitting: \n \
1495\n \
1496 - different training settings can be stacked. Such that the initial training \n\
1497 is done with a large learning rate and a large drop out fraction whilst \n \
1498 in a later stage learning rate and drop out can be reduced. \n \
1499 - drop out \n \
1500 [recommended: \n \
1501 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1502 later training stage: 0.1 or 0.0 for all layers \n \
1503 final training stage: 0.0] \n \
1504 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1505 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1506 - L1 and L2 regularization are available \n \
1507 - Minibatches \n \
1508 [recommended 10 - 150] \n \
1509 Arbitrary mini-batch sizes can be chosen. \n \
1510 - Multithreading \n \
1511 [recommended: True] \n \
1512 Multithreading can be turned on. The minibatches are distributed to the available \n \
1513 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1514 \n \
1515 Options: \n \
1516 \"Layout\": \n \
1517 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1518 - meaning: \n \
1519 . two hidden layers (separated by \",\") \n \
1520 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1521 . the activation function for the output layer is LINEAR \n \
1522 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1523 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1524 . the number of nodes in the output layer is determined by the number of output nodes \n \
1525 and can therefore not be chosen freely. \n \
1526 \n \
1527 \"ErrorStrategy\": \n \
1528 - SUMOFSQUARES \n \
1529 The error of the neural net is determined by a sum-of-squares error function \n \
1530 For regression, this is the only possible choice. \n \
1531 - CROSSENTROPY \n \
1532 The error of the neural net is determined by a cross entropy function. The \n \
1533 output values are automatically (internally) transformed into probabilities \n \
1534 using a sigmoid function. \n \
1535 For signal/background classification this is the default choice. \n \
1536 For multiclass using cross entropy more than one or no output classes \n \
1537 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1538 A and C is true, Event 2: C is true, ...) \n \
1539 - MUTUALEXCLUSIVE \n \
1540 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1541 \n \
1542 \"WeightInitialization\" \n \
1543 - XAVIER \n \
1544 [recommended] \n \
1545 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1546 such that the variance of the values of the nodes is preserved for each layer. \n \
1547 - XAVIERUNIFORM \n \
1548 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1549 - LAYERSIZE \n \
1550 Random values scaled by the layer size \n \
1551 \n \
1552 \"TrainingStrategy\" \n \
1553 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1554 - explanation: two stacked training settings separated by \"|\" \n \
1555 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1556 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1557 . LearningRate : \n \
1558 - recommended for classification: 0.1 initially, 1e-4 later \n \
1559 - recommended for regression: 1e-4 and less \n \
1560 . Momentum : \n \
1561 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1562 . Repetitions : \n \
1563 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1564 . ConvergenceSteps : \n \
1565 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1566 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1567 cycle the test samples are evaluated and thus the convergence is checked) \n \
1568 . BatchSize \n \
1569 Size of the mini-batches. \n \
1570 . TestRepetitions \n \
1571 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1572 . WeightDecay \n \
1573 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1574 . Renormalize \n \
1575 NONE, L1 (|w|) or L2 (w^2) \n \
1576 . DropConfig \n \
1577 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1578 in the DropConfig. \n \
1579 [example: DropConfig=0.0+0.5+0.3 \n \
1580 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1581 in layer 2 \n \
1582 recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1583 turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1584 turned on (0.0) in later training stages] \n \
1585 . DropRepetitions \n \
1586 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1587 [recommended : 1] \n \
1588 . Multithreading \n \
1589 turn on multithreading [recommended: True] \n \
1590 \n";
1591 Log () << txt << Endl;
1592}
1593
1594} // namespace TMVA
#define REGISTER_METHOD(CLASS)
for example
double
Definition: Converters.cxx:921
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
#define NONE
Definition: Rotated.cxx:52
int Int_t
Definition: RtypesCore.h:43
unsigned int UInt_t
Definition: RtypesCore.h:44
const Bool_t kFALSE
Definition: RtypesCore.h:90
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
int type
Definition: TGX11.cxx:120
Definition: Pattern.h:8
The Formula class.
Definition: TFormula.h:84
Bool_t WriteOptionsReference() const
Definition: Config.h:67
Layer defines the layout of a layer.
Definition: NeuralNet.h:675
neural net
Definition: NeuralNet.h:1067
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1097
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1288
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1101
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1099
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1098
Settings for the training of the neural net.
Definition: NeuralNet.h:735
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:333
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
static void Copy(Matrix_t &B, const Matrix_t &A)
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Definition: Minimizers.h:665
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Definition: Minimizers.h:329
size_t GetTestInterval() const
Definition: Minimizers.h:163
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Definition: Minimizers.h:436
size_t GetConvergenceCount() const
Definition: Minimizers.h:159
size_t GetConvergenceSteps() const
Definition: Minimizers.h:160
Generic neural network class.
Definition: Net.h:49
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:152
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
void SetRegularization(ERegularization R)
Definition: Net.h:150
size_t GetOutputWidth() const
Definition: Net.h:144
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:263
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size.
Definition: Net.h:212
Scalar_t GetNFlops()
Definition: Net.h:347
size_t GetBatchSize() const
Definition: Net.h:138
size_t GetDepth() const
Definition: Net.h:137
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:378
size_t GetInputWidth() const
Definition: Net.h:143
Layer_t & GetLayer(size_t i)
Definition: Net.h:139
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
std::vector< Float_t > & GetValues()
Definition: Event.h:94
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Deep Neural Network Implementation.
Definition: MethodDNN.h:73
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDNN.cxx:1347
UInt_t GetNumValidationSamples()
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDNN.cxx:1396
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDNN.h:83
typename Architecture_t::Matrix_t Matrix_t
Definition: MethodDNN.h:78
void ReadWeightsFromStream(std::istream &i)
Definition: MethodDNN.cxx:1448
LayoutVector_t ParseLayoutString(TString layerSpec)
void MakeClassSpecific(std::ostream &, const TString &) const
Definition: MethodDNN.cxx:1465
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
void ProcessOptions()
Definition: MethodDNN.cxx:419
virtual ~MethodDNN()
DNN::EInitialization fWeightInitialization
Definition: MethodDNN.h:108
void DeclareOptions()
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDNN.cxx:1292
const Ranking * CreateRanking()
Definition: MethodDNN.cxx:1454
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
DNN::EOutputFunction fOutputFunction
Definition: MethodDNN.h:109
void AddWeightsXMLTo(void *parent) const
Definition: MethodDNN.cxx:1370
void GetHelpMessage() const
Definition: MethodDNN.cxx:1472
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDNN.cxx:1309
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TXMLEngine & xmlengine()
Definition: Tools.h:268
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
TMatrixT.
Definition: TMatrixT.h:39
An array of TObjects.
Definition: TObjArray.h:37
Collectable string class.
Definition: TObjString.h:28
const TString & GetString() const
Definition: TObjString.h:46
Basic string class.
Definition: TString.h:131
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
const char * Data() const
Definition: TString.h:364
@ kTrailing
Definition: TString.h:262
@ kBoth
Definition: TString.h:262
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition: TString.cxx:2197
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2025
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580
constexpr size_t block
Definition: BatchHelpers.h:29
double T(double x)
Definition: ChebyshevPol.h:34
static const std::string separator("@@@")
static constexpr double s
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
EnumRegularization
Definition: NeuralNet.h:172
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
@ fSteepest
SGD.
Definition: NeuralNet.h:321
ModeOutputValues
Definition: NeuralNet.h:178
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations
Config & gConfig()
Tools & gTools()
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
Definition: MethodDNN.cxx:315
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750
DNN::ERegularization regularization
Definition: MethodDNN.h:90
std::vector< Double_t > dropoutProbabilities
Definition: MethodDNN.h:94
auto * l
Definition: textangle.C:4
static void output(int code)
Definition: gifencode.c:226