Logo ROOT  
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Kim Albertsson
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25  * *
26  * Copyright (c) 2005-2015: *
27  * CERN, Switzerland *
28  * U. of Victoria, Canada *
29  * MPI-K Heidelberg, Germany *
30  * U. of Bonn, Germany *
31  * UdeA/ITM, Colombia *
32  * U. of Florida, USA *
33  * *
34  * Redistribution and use in source and binary forms, with or without *
35  * modification, are permitted according to the terms listed in LICENSE *
36  * (http://tmva.sourceforge.net/LICENSE) *
37  **********************************************************************************/
38 
39 /*! \class TMVA::Factory
40 \ingroup TMVA
41 
42 This is the main MVA steering class.
43 It creates all MVA methods, and guides them through the training, testing and
44 evaluation phases.
45 */
46 
47 #include "TMVA/Factory.h"
48 
49 #include "TMVA/ClassifierFactory.h"
50 #include "TMVA/Config.h"
51 #include "TMVA/Configurable.h"
52 #include "TMVA/Tools.h"
53 #include "TMVA/Ranking.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/IMethod.h"
56 #include "TMVA/MethodBase.h"
57 #include "TMVA/DataInputHandler.h"
58 #include "TMVA/DataSetManager.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/DataLoader.h"
61 #include "TMVA/MethodBoost.h"
62 #include "TMVA/MethodCategory.h"
63 #include "TMVA/ROCCalc.h"
64 #include "TMVA/ROCCurve.h"
65 #include "TMVA/MsgLogger.h"
66 
67 #include "TMVA/VariableInfo.h"
68 #include "TMVA/VariableTransform.h"
69 
70 #include "TMVA/Results.h"
72 #include "TMVA/ResultsRegression.h"
73 #include "TMVA/ResultsMulticlass.h"
74 #include <list>
75 #include <bitset>
76 #include <set>
77 
78 #include "TMVA/Types.h"
79 
80 #include "TROOT.h"
81 #include "TFile.h"
82 #include "TTree.h"
83 #include "TLeaf.h"
84 #include "TEventList.h"
85 #include "TH2.h"
86 #include "TText.h"
87 #include "TLegend.h"
88 #include "TGraph.h"
89 #include "TStyle.h"
90 #include "TMatrixF.h"
91 #include "TMatrixDSym.h"
92 #include "TMultiGraph.h"
93 #include "TPrincipal.h"
94 #include "TMath.h"
95 #include "TSystem.h"
96 #include "TCanvas.h"
97 
99 //const Int_t MinNoTestEvents = 1;
100 
102 
103 #define READXML kTRUE
104 
105 //number of bits for bitset
106 #define VIBITS 32
107 
108 
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// Standard constructor.
112 ///
113 /// - jobname : this name will appear in all weight file names produced by the MVAs
114 /// - theTargetFile : output ROOT file; the test tree and all evaluation plots
115 /// will be stored here
116 /// - theOption : option string; currently: "V" for verbose
117 
118 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
119 : Configurable ( theOption ),
120  fTransformations ( "I" ),
121  fVerbose ( kFALSE ),
122  fVerboseLevel ( kINFO ),
123  fCorrelations ( kFALSE ),
124  fROC ( kTRUE ),
125  fSilentFile ( theTargetFile == nullptr ),
126  fJobName ( jobName ),
127  fAnalysisType ( Types::kClassification ),
128  fModelPersistence (kTRUE)
129 {
130  fName = "Factory";
131  fgTargetFile = theTargetFile;
133 
134  // render silent
135  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
136 
137 
138  // init configurable
139  SetConfigDescription( "Configuration options for Factory running" );
140  SetConfigName( GetName() );
141 
142  // histograms are not automatically associated with the current
143  // directory and hence don't go out of scope when closing the file
144  // TH1::AddDirectory(kFALSE);
145  Bool_t silent = kFALSE;
146 #ifdef WIN32
147  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
148  Bool_t color = kFALSE;
149  Bool_t drawProgressBar = kFALSE;
150 #else
151  Bool_t color = !gROOT->IsBatch();
152  Bool_t drawProgressBar = kTRUE;
153 #endif
154  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
155  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
156  AddPreDefVal(TString("Debug"));
157  AddPreDefVal(TString("Verbose"));
158  AddPreDefVal(TString("Info"));
159  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
160  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
161  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
162  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
163  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
164  DeclareOptionRef( drawProgressBar,
165  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
167  "ModelPersistence",
168  "Option to save the trained model in xml file or using serialization");
169 
170  TString analysisType("Auto");
171  DeclareOptionRef( analysisType,
172  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
173  AddPreDefVal(TString("Classification"));
174  AddPreDefVal(TString("Regression"));
175  AddPreDefVal(TString("Multiclass"));
176  AddPreDefVal(TString("Auto"));
177 
178  ParseOptions();
180 
181  if (Verbose()) fLogger->SetMinType( kVERBOSE );
182  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
183  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
184  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
185 
186  // global settings
187  gConfig().SetUseColor( color );
188  gConfig().SetSilent( silent );
189  gConfig().SetDrawProgressBar( drawProgressBar );
190 
191  analysisType.ToLower();
192  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
193  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
194  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
195  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
196 
197 // Greetings();
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 /// Constructor.
202 
204 : Configurable ( theOption ),
205  fTransformations ( "I" ),
206  fVerbose ( kFALSE ),
207  fCorrelations ( kFALSE ),
208  fROC ( kTRUE ),
209  fSilentFile ( kTRUE ),
210  fJobName ( jobName ),
211  fAnalysisType ( Types::kClassification ),
212  fModelPersistence (kTRUE)
213 {
214  fName = "Factory";
215  fgTargetFile = nullptr;
217 
218 
219  // render silent
220  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
221 
222 
223  // init configurable
224  SetConfigDescription( "Configuration options for Factory running" );
225  SetConfigName( GetName() );
226 
227  // histograms are not automatically associated with the current
228  // directory and hence don't go out of scope when closing the file
230  Bool_t silent = kFALSE;
231 #ifdef WIN32
232  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
233  Bool_t color = kFALSE;
234  Bool_t drawProgressBar = kFALSE;
235 #else
236  Bool_t color = !gROOT->IsBatch();
237  Bool_t drawProgressBar = kTRUE;
238 #endif
239  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
240  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
241  AddPreDefVal(TString("Debug"));
242  AddPreDefVal(TString("Verbose"));
243  AddPreDefVal(TString("Info"));
244  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
245  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
246  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
247  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
248  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
249  DeclareOptionRef( drawProgressBar,
250  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
252  "ModelPersistence",
253  "Option to save the trained model in xml file or using serialization");
254 
255  TString analysisType("Auto");
256  DeclareOptionRef( analysisType,
257  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
258  AddPreDefVal(TString("Classification"));
259  AddPreDefVal(TString("Regression"));
260  AddPreDefVal(TString("Multiclass"));
261  AddPreDefVal(TString("Auto"));
262 
263  ParseOptions();
265 
266  if (Verbose()) fLogger->SetMinType( kVERBOSE );
267  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
268  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
269  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
270 
271  // global settings
272  gConfig().SetUseColor( color );
273  gConfig().SetSilent( silent );
274  gConfig().SetDrawProgressBar( drawProgressBar );
275 
276  analysisType.ToLower();
277  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
278  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
279  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
280  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
281 
282  Greetings();
283 }
284 
285 ////////////////////////////////////////////////////////////////////////////////
286 /// Print welcome message.
287 /// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
288 
290 {
292  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
293  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
294 }
295 
296 ////////////////////////////////////////////////////////////////////////////////
297 /// Destructor.
298 
300 {
301  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
302  for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
303 
304  this->DeleteAllMethods();
305 
306 
307  // problem with call of REGISTER_METHOD macro ...
308  // ClassifierFactory::DestroyInstance();
309  // Types::DestroyInstance();
310  //Tools::DestroyInstance();
311  //Config::DestroyInstance();
312 }
313 
314 ////////////////////////////////////////////////////////////////////////////////
315 /// Delete methods.
316 
318 {
319  std::map<TString,MVector*>::iterator itrMap;
320 
321  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
322  {
323  MVector *methods=itrMap->second;
324  // delete methods
325  MVector::iterator itrMethod = methods->begin();
326  for (; itrMethod != methods->end(); ++itrMethod) {
327  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
328  delete (*itrMethod);
329  }
330  methods->clear();
331  delete methods;
332  }
333 }
334 
335 ////////////////////////////////////////////////////////////////////////////////
336 
338 {
339  fVerbose = v;
340 }
341 
342 ////////////////////////////////////////////////////////////////////////////////
343 /// Book a classifier or regression method.
344 
345 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
346 {
347  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
348 
349  TString datasetname=loader->GetName();
350 
351  if( fAnalysisType == Types::kNoAnalysisType ){
352  if( loader->GetDataSetInfo().GetNClasses()==2
353  && loader->GetDataSetInfo().GetClassInfo("Signal") != NULL
354  && loader->GetDataSetInfo().GetClassInfo("Background") != NULL
355  ){
356  fAnalysisType = Types::kClassification; // default is classification
357  } else if( loader->GetDataSetInfo().GetNClasses() >= 2 ){
358  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
359  } else
360  Log() << kFATAL << "No analysis type for " << loader->GetDataSetInfo().GetNClasses() << " classes and "
361  << loader->GetDataSetInfo().GetNTargets() << " regression targets." << Endl;
362  }
363 
364  // booking via name; the names are translated into enums and the
365  // corresponding overloaded BookMethod is called
366 
367  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
368  {
369  if (GetMethod( datasetname,methodTitle ) != 0) {
370  Log() << kFATAL << "Booking failed since method with title <"
371  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
372  << Endl;
373  }
374  }
375 
376 
377  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
378  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
379  << gTools().Color("reset") << Endl << Endl;
380 
381  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
382  Int_t boostNum = 0;
383  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
384  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
385  "Number of times the classifier will be boosted" );
386  conf->ParseOptions();
387  delete conf;
388  // this is name of weight file directory
389  TString fileDir;
390  if(fModelPersistence)
391  {
392  // find prefix in fWeightFileDir;
394  fileDir = prefix;
395  if (!prefix.IsNull())
396  if (fileDir[fileDir.Length()-1] != '/') fileDir += "/";
397  fileDir += loader->GetName();
398  fileDir += "/" + gConfig().GetIONames().fWeightFileDir;
399  }
400  // initialize methods
401  IMethod* im;
402  if (!boostNum) {
403  im = ClassifierFactory::Instance().Create(theMethodName.Data(), fJobName, methodTitle,
404  loader->GetDataSetInfo(), theOption);
405  }
406  else {
407  // boosted classifier, requires a specific definition, making it transparent for the user
408  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
409  im = ClassifierFactory::Instance().Create("Boost", fJobName, methodTitle, loader->GetDataSetInfo(), theOption);
410  MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im); // DSMTEST divided into two lines
411  if (!methBoost) { // DSMTEST
412  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
413  return nullptr;
414  }
415  if (fModelPersistence) methBoost->SetWeightFileDir(fileDir);
416  methBoost->SetModelPersistence(fModelPersistence);
417  methBoost->SetBoostedMethodName(theMethodName); // DSMTEST divided into two lines
418  methBoost->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
419  methBoost->SetFile(fgTargetFile);
420  methBoost->SetSilentFile(IsSilentFile());
421  }
422 
423  MethodBase *method = dynamic_cast<MethodBase*>(im);
424  if (method==0) return 0; // could not create method
425 
426  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
427  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
428  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
429  if (!methCat) {// DSMTEST
430  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
431  return nullptr;
432  }
433  if(fModelPersistence) methCat->SetWeightFileDir(fileDir);
434  methCat->SetModelPersistence(fModelPersistence);
435  methCat->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
436  methCat->SetFile(fgTargetFile);
437  methCat->SetSilentFile(IsSilentFile());
438  } // DSMTEST
439 
440 
441  if (!method->HasAnalysisType( fAnalysisType,
442  loader->GetDataSetInfo().GetNClasses(),
443  loader->GetDataSetInfo().GetNTargets() )) {
444  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
445  if (fAnalysisType == Types::kRegression) {
446  Log() << "regression with " << loader->GetDataSetInfo().GetNTargets() << " targets." << Endl;
447  }
448  else if (fAnalysisType == Types::kMulticlass ) {
449  Log() << "multiclass classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
450  }
451  else {
452  Log() << "classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
453  }
454  return 0;
455  }
456 
457  if(fModelPersistence) method->SetWeightFileDir(fileDir);
458  method->SetModelPersistence(fModelPersistence);
459  method->SetAnalysisType( fAnalysisType );
460  method->SetupMethod();
461  method->ParseOptions();
462  method->ProcessSetup();
463  method->SetFile(fgTargetFile);
464  method->SetSilentFile(IsSilentFile());
465 
466  // check-for-unused-options is performed; may be overridden by derived classes
467  method->CheckSetup();
468 
469  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
470  {
471  MVector *mvector=new MVector;
472  fMethodsMap[datasetname]=mvector;
473  }
474  fMethodsMap[datasetname]->push_back( method );
475  return method;
476 }
477 
478 ////////////////////////////////////////////////////////////////////////////////
479 /// Books MVA method. The option configuration string is custom for each MVA
480 /// the TString field "theNameAppendix" serves to define (and distinguish)
481 /// several instances of a given MVA, eg, when one wants to compare the
482 /// performance of various configurations
483 
485 {
486  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
487 }
488 
489 ////////////////////////////////////////////////////////////////////////////////
490 /// Adds an already constructed method to be managed by this factory.
491 ///
492 /// \note Private.
493 /// \note Know what you are doing when using this method. The method that you
494 /// are loading could be trained already.
495 ///
496 
498 {
499  TString datasetname = loader->GetName();
500  std::string methodTypeName = std::string(Types::Instance().GetMethodName(methodType).Data());
501  DataSetInfo &dsi = loader->GetDataSetInfo();
502 
503  IMethod *im = ClassifierFactory::Instance().Create(methodTypeName, dsi, weightfile );
504  MethodBase *method = (dynamic_cast<MethodBase*>(im));
505 
506  if (method == nullptr) return nullptr;
507 
508  if( method->GetMethodType() == Types::kCategory ){
509  Log() << kERROR << "Cannot handle category methods for now." << Endl;
510  }
511 
512  TString fileDir;
513  if(fModelPersistence) {
514  // find prefix in fWeightFileDir;
516  fileDir = prefix;
517  if (!prefix.IsNull())
518  if (fileDir[fileDir.Length() - 1] != '/')
519  fileDir += "/";
520  fileDir=loader->GetName();
521  fileDir+="/"+gConfig().GetIONames().fWeightFileDir;
522  }
523 
524  if(fModelPersistence) method->SetWeightFileDir(fileDir);
525  method->SetModelPersistence(fModelPersistence);
526  method->SetAnalysisType( fAnalysisType );
527  method->SetupMethod();
528  method->SetFile(fgTargetFile);
529  method->SetSilentFile(IsSilentFile());
530 
531  method->DeclareCompatibilityOptions();
532 
533  // read weight file
534  method->ReadStateFromFile();
535 
536  //method->CheckSetup();
537 
538  TString methodTitle = method->GetName();
539  if (HasMethod(datasetname, methodTitle) != 0) {
540  Log() << kFATAL << "Booking failed since method with title <"
541  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
542  << Endl;
543  }
544 
545  Log() << kINFO << "Booked classifier \"" << method->GetMethodName()
546  << "\" of type: \"" << method->GetMethodTypeName() << "\"" << Endl;
547 
548  if(fMethodsMap.count(datasetname) == 0) {
549  MVector *mvector = new MVector;
550  fMethodsMap[datasetname] = mvector;
551  }
552 
553  fMethodsMap[datasetname]->push_back( method );
554 
555  return method;
556 }
557 
558 ////////////////////////////////////////////////////////////////////////////////
559 /// Returns pointer to MVA that corresponds to given method title.
560 
561 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
562 {
563  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
564 
565  MVector *methods=fMethodsMap.find(datasetname)->second;
566 
567  MVector::const_iterator itrMethod;
568  //
569  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
570  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
571  if ( (mva->GetMethodName())==methodTitle ) return mva;
572  }
573  return 0;
574 }
575 
576 ////////////////////////////////////////////////////////////////////////////////
577 /// Checks whether a given method name is defined for a given dataset.
578 
579 Bool_t TMVA::Factory::HasMethod(const TString& datasetname, const TString &methodTitle ) const
580 {
581  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
582 
583  std::string methodName = methodTitle.Data();
584  auto isEqualToMethodName = [&methodName](TMVA::IMethod * m) {
585  return ( 0 == methodName.compare( m->GetName() ) );
586  };
587 
588  TMVA::Factory::MVector * methods = this->fMethodsMap.at(datasetname);
589  Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
590 
591  return isMethodNameExisting;
592 }
593 
594 ////////////////////////////////////////////////////////////////////////////////
595 
597 {
598  RootBaseDir()->cd();
599 
600  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
601  else return; //loader is now in the output file, we dont need to save again
602 
603  RootBaseDir()->cd(fDataSetInfo.GetName());
604  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
605 
606 
607  // correlation matrix of the default DS
608  const TMatrixD* m(0);
609  const TH2* h(0);
610 
611  if(fAnalysisType == Types::kMulticlass){
612  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
613  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
614  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
615  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
616  if (h!=0) {
617  h->Write();
618  delete h;
619  }
620  }
621  }
622  else{
623  m = fDataSetInfo.CorrelationMatrix( "Signal" );
624  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
625  if (h!=0) {
626  h->Write();
627  delete h;
628  }
629 
630  m = fDataSetInfo.CorrelationMatrix( "Background" );
631  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
632  if (h!=0) {
633  h->Write();
634  delete h;
635  }
636 
637  m = fDataSetInfo.CorrelationMatrix( "Regression" );
638  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
639  if (h!=0) {
640  h->Write();
641  delete h;
642  }
643  }
644 
645  // some default transformations to evaluate
646  // NOTE: all transformations are destroyed after this test
647  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
648 
649  // plus some user defined transformations
650  processTrfs = fTransformations;
651 
652  // remove any trace of identity transform - if given (avoid to apply it twice)
653  std::vector<TMVA::TransformationHandler*> trfs;
654  TransformationHandler* identityTrHandler = 0;
655 
656  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
657  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
658  for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
659  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
660  TString trfS = (*trfsDefIt);
661 
662  //Log() << kINFO << Endl;
663  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
665  fDataSetInfo,
666  *(trfs.back()),
667  Log() );
668 
669  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
670  }
671 
672  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
673 
674  // apply all transformations
675  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
676 
677  for (;trfIt != trfs.end(); ++trfIt) {
678  // setting a Root dir causes the variables distributions to be saved to the root file
679  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
680  (*trfIt)->CalcTransformations(inputEvents);
681  }
682  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
683 
684  // clean up
685  for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt) delete *trfIt;
686 }
687 
688 ////////////////////////////////////////////////////////////////////////////////
689 /// Iterates through all booked methods and sees if they use parameter tuning and if so..
690 /// does just that i.e. calls "Method::Train()" for different parameter settings and
691 /// keeps in mind the "optimal one"... and that's the one that will later on be used
692 /// in the main training loop.
693 
694 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
695 {
696 
697  std::map<TString,MVector*>::iterator itrMap;
698  std::map<TString,Double_t> TunedParameters;
699  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
700  {
701  MVector *methods=itrMap->second;
702 
703  MVector::iterator itrMethod;
704 
705  // iterate over methods and optimize
706  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
708  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
709  if (!mva) {
710  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
711  return TunedParameters;
712  }
713 
714  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
715  Log() << kWARNING << "Method " << mva->GetMethodName()
716  << " not trained (training tree has less entries ["
717  << mva->Data()->GetNTrainingEvents()
718  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
719  continue;
720  }
721 
722  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
723  << (fAnalysisType == Types::kRegression ? "Regression" :
724  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
725 
726  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
727  Log() << kINFO << "Optimization of tuning parameters finished for Method:"<<mva->GetName() << Endl;
728  }
729  }
730 
731  return TunedParameters;
732 
733 }
734 
735 ////////////////////////////////////////////////////////////////////////////////
736 /// Private method to generate a ROCCurve instance for a given method.
737 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
738 /// understands.
739 ///
740 /// \note You own the retured pointer.
741 ///
742 
745 {
746  return GetROC((TString)loader->GetName(), theMethodName, iClass, type);
747 }
748 
749 ////////////////////////////////////////////////////////////////////////////////
750 /// Private method to generate a ROCCurve instance for a given method.
751 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
752 /// understands.
753 ///
754 /// \note You own the retured pointer.
755 ///
756 
758 {
759  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
760  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
761  return nullptr;
762  }
763 
764  if (!this->HasMethod(datasetname, theMethodName)) {
765  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
766  << Endl;
767  return nullptr;
768  }
769 
770  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
771  if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
772  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
773  << Endl;
774  return nullptr;
775  }
776 
777  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
778  TMVA::DataSet *dataset = method->Data();
779  dataset->SetCurrentType(type);
780  TMVA::Results *results = dataset->GetResults(theMethodName, type, this->fAnalysisType);
781 
782  UInt_t nClasses = method->DataInfo().GetNClasses();
783  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
784  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
785  iClass, nClasses)
786  << Endl;
787  return nullptr;
788  }
789 
790  TMVA::ROCCurve *rocCurve = nullptr;
791  if (this->fAnalysisType == Types::kClassification) {
792 
793  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
794  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
795  std::vector<Float_t> mvaResWeights;
796 
797  auto eventCollection = dataset->GetEventCollection(type);
798  mvaResWeights.reserve(eventCollection.size());
799  for (auto ev : eventCollection) {
800  mvaResWeights.push_back(ev->GetWeight());
801  }
802 
803  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
804 
805  } else if (this->fAnalysisType == Types::kMulticlass) {
806  std::vector<Float_t> mvaRes;
807  std::vector<Bool_t> mvaResTypes;
808  std::vector<Float_t> mvaResWeights;
809 
810  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
811 
812  // Vector transpose due to values being stored as
813  // [ [0, 1, 2], [0, 1, 2], ... ]
814  // in ResultsMulticlass::GetValueVector.
815  mvaRes.reserve(rawMvaRes->size());
816  for (auto item : *rawMvaRes) {
817  mvaRes.push_back(item[iClass]);
818  }
819 
820  auto eventCollection = dataset->GetEventCollection(type);
821  mvaResTypes.reserve(eventCollection.size());
822  mvaResWeights.reserve(eventCollection.size());
823  for (auto ev : eventCollection) {
824  mvaResTypes.push_back(ev->GetClass() == iClass);
825  mvaResWeights.push_back(ev->GetWeight());
826  }
827 
828  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
829  }
830 
831  return rocCurve;
832 }
833 
834 ////////////////////////////////////////////////////////////////////////////////
835 /// Calculate the integral of the ROC curve, also known as the area under curve
836 /// (AUC), for a given method.
837 ///
838 /// Argument iClass specifies the class to generate the ROC curve in a
839 /// multiclass setting. It is ignored for binary classification.
840 ///
841 
843 {
844  return GetROCIntegral((TString)loader->GetName(), theMethodName, iClass);
845 }
846 
847 ////////////////////////////////////////////////////////////////////////////////
848 /// Calculate the integral of the ROC curve, also known as the area under curve
849 /// (AUC), for a given method.
850 ///
851 /// Argument iClass specifies the class to generate the ROC curve in a
852 /// multiclass setting. It is ignored for binary classification.
853 ///
854 
855 Double_t TMVA::Factory::GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass)
856 {
857  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
858  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
859  return 0;
860  }
861 
862  if ( ! this->HasMethod(datasetname, theMethodName) ) {
863  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
864  return 0;
865  }
866 
867  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
868  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
869  Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
870  << Endl;
871  return 0;
872  }
873 
874  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
875  if (!rocCurve) {
876  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ",
877  theMethodName.Data(), datasetname.Data())
878  << Endl;
879  return 0;
880  }
881 
883  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
884  delete rocCurve;
885 
886  return rocIntegral;
887 }
888 
889 ////////////////////////////////////////////////////////////////////////////////
890 /// Argument iClass specifies the class to generate the ROC curve in a
891 /// multiclass setting. It is ignored for binary classification.
892 ///
893 /// Returns a ROC graph for a given method, or nullptr on error.
894 ///
895 /// Note: Evaluation of the given method must have been run prior to ROC
896 /// generation through Factory::EvaluateAllMetods.
897 ///
898 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
899 /// and the others considered background. This is ok in binary classification
900 /// but in in multi class classification, the ROC surface is an N dimensional
901 /// shape, where N is number of classes - 1.
902 
903 TGraph* TMVA::Factory::GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles, UInt_t iClass)
904 {
905  return GetROCCurve( (TString)loader->GetName(), theMethodName, setTitles, iClass );
906 }
907 
908 ////////////////////////////////////////////////////////////////////////////////
909 /// Argument iClass specifies the class to generate the ROC curve in a
910 /// multiclass setting. It is ignored for binary classification.
911 ///
912 /// Returns a ROC graph for a given method, or nullptr on error.
913 ///
914 /// Note: Evaluation of the given method must have been run prior to ROC
915 /// generation through Factory::EvaluateAllMetods.
916 ///
917 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
918 /// and the others considered background. This is ok in binary classification
919 /// but in in multi class classification, the ROC surface is an N dimensional
920 /// shape, where N is number of classes - 1.
921 
922 TGraph* TMVA::Factory::GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles, UInt_t iClass)
923 {
924  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
925  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
926  return nullptr;
927  }
928 
929  if ( ! this->HasMethod(datasetname, theMethodName) ) {
930  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
931  return nullptr;
932  }
933 
934  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
935  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
936  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.") << Endl;
937  return nullptr;
938  }
939 
940  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
941  TGraph *graph = nullptr;
942 
943  if ( ! rocCurve ) {
944  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
945  return nullptr;
946  }
947 
948  graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
949  delete rocCurve;
950 
951  if(setTitles) {
952  graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
953  graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
954  graph->SetTitle(Form("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()));
955  }
956 
957  return graph;
958 }
959 
960 ////////////////////////////////////////////////////////////////////////////////
961 /// Generate a collection of graphs, for all methods for a given class. Suitable
962 /// for comparing method performance.
963 ///
964 /// Argument iClass specifies the class to generate the ROC curve in a
965 /// multiclass setting. It is ignored for binary classification.
966 ///
967 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
968 /// and the others considered background. This is ok in binary classification
969 /// but in in multi class classification, the ROC surface is an N dimensional
970 /// shape, where N is number of classes - 1.
971 
973 {
974  return GetROCCurveAsMultiGraph((TString)loader->GetName(), iClass);
975 }
976 
977 ////////////////////////////////////////////////////////////////////////////////
978 /// Generate a collection of graphs, for all methods for a given class. Suitable
979 /// for comparing method performance.
980 ///
981 /// Argument iClass specifies the class to generate the ROC curve in a
982 /// multiclass setting. It is ignored for binary classification.
983 ///
984 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
985 /// and the others considered background. This is ok in binary classification
986 /// but in in multi class classification, the ROC surface is an N dimensional
987 /// shape, where N is number of classes - 1.
988 
990 {
991  UInt_t line_color = 1;
992 
993  TMultiGraph *multigraph = new TMultiGraph();
994 
995  MVector *methods = fMethodsMap[datasetname.Data()];
996  for (auto * method_raw : *methods) {
997  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(method_raw);
998  if (method == nullptr) { continue; }
999 
1000  TString methodName = method->GetMethodName();
1001  UInt_t nClasses = method->DataInfo().GetNClasses();
1002 
1003  if ( this->fAnalysisType == Types::kMulticlass && iClass >= nClasses ) {
1004  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) << Endl;
1005  continue;
1006  }
1007 
1008  TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
1009 
1010  TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass);
1011  graph->SetTitle(methodName);
1012 
1013  graph->SetLineWidth(2);
1014  graph->SetLineColor(line_color++);
1015  graph->SetFillColor(10);
1016 
1017  multigraph->Add(graph);
1018  }
1019 
1020  if ( multigraph->GetListOfGraphs() == nullptr ) {
1021  Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
1022  return nullptr;
1023  }
1024 
1025  return multigraph;
1026 }
1027 
1028 ////////////////////////////////////////////////////////////////////////////////
1029 /// Draws ROC curves for all methods booked with the factory for a given class
1030 /// onto a canvas.
1031 ///
1032 /// Argument iClass specifies the class to generate the ROC curve in a
1033 /// multiclass setting. It is ignored for binary classification.
1034 ///
1035 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1036 /// and the others considered background. This is ok in binary classification
1037 /// but in in multi class classification, the ROC surface is an N dimensional
1038 /// shape, where N is number of classes - 1.
1039 
1041 {
1042  return GetROCCurve((TString)loader->GetName(), iClass);
1043 }
1044 
1045 ////////////////////////////////////////////////////////////////////////////////
1046 /// Draws ROC curves for all methods booked with the factory for a given class.
1047 ///
1048 /// Argument iClass specifies the class to generate the ROC curve in a
1049 /// multiclass setting. It is ignored for binary classification.
1050 ///
1051 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1052 /// and the others considered background. This is ok in binary classification
1053 /// but in in multi class classification, the ROC surface is an N dimensional
1054 /// shape, where N is number of classes - 1.
1055 
1057 {
1058  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1059  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
1060  return 0;
1061  }
1062 
1063  TString name = Form("ROCCurve %s class %i", datasetname.Data(), iClass);
1064  TCanvas *canvas = new TCanvas(name, "ROC Curve", 200, 10, 700, 500);
1065  canvas->SetGrid();
1066 
1067  TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1068 
1069  if ( multigraph ) {
1070  multigraph->Draw("AL");
1071 
1072  multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
1073  multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
1074 
1075  TString titleString = Form("Signal efficiency vs. Background rejection");
1076  if (this->fAnalysisType == Types::kMulticlass) {
1077  titleString = Form("%s (Class=%i)", titleString.Data(), iClass);
1078  }
1079 
1080  // Workaround for TMultigraph not drawing title correctly.
1081  multigraph->GetHistogram()->SetTitle( titleString );
1082  multigraph->SetTitle( titleString );
1083 
1084  canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1085  }
1086 
1087  return canvas;
1088 }
1089 
1090 ////////////////////////////////////////////////////////////////////////////////
1091 /// Iterates through all booked methods and calls training
1092 
1094 {
1095  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1096  // iterates over all MVAs that have been booked, and calls their training methods
1097 
1098 
1099  // don't do anything if no method booked
1100  if (fMethodsMap.empty()) {
1101  Log() << kINFO << "...nothing found to train" << Endl;
1102  return;
1103  }
1104 
1105  // here the training starts
1106  //Log() << kINFO << " " << Endl;
1107  Log() << kDEBUG << "Train all methods for "
1108  << (fAnalysisType == Types::kRegression ? "Regression" :
1109  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
1110 
1111  std::map<TString,MVector*>::iterator itrMap;
1112 
1113  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1114  {
1115  MVector *methods=itrMap->second;
1116  MVector::iterator itrMethod;
1117 
1118  // iterate over methods and train
1119  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1121  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1122 
1123  if(mva==0) continue;
1124 
1125  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1126  Log() << kFATAL << "No input data for the training provided!" << Endl;
1127  }
1128 
1129  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
1130  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1131  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
1132  && mva->DataInfo().GetNClasses() < 2 )
1133  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1134 
1135  // first print some information about the default dataset
1136  if(!IsSilentFile()) WriteDataInformation(mva->fDataSetInfo);
1137 
1138 
1139  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1140  Log() << kWARNING << "Method " << mva->GetMethodName()
1141  << " not trained (training tree has less entries ["
1142  << mva->Data()->GetNTrainingEvents()
1143  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1144  continue;
1145  }
1146 
1147  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1148  << (fAnalysisType == Types::kRegression ? "Regression" :
1149  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
1150  mva->TrainMethod();
1151  Log() << kHEADER << "Training finished" << Endl << Endl;
1152  }
1153 
1154  if (fAnalysisType != Types::kRegression) {
1155 
1156  // variable ranking
1157  //Log() << Endl;
1158  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1159  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1160  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1161  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1162 
1163  // create and print ranking
1164  const Ranking* ranking = (*itrMethod)->CreateRanking();
1165  if (ranking != 0) ranking->Print();
1166  else Log() << kINFO << "No variable ranking supplied by classifier: "
1167  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
1168  }
1169  }
1170  }
1171 
1172  // save training history in case we are not in the silent mode
1173  if (!IsSilentFile()) {
1174  for (UInt_t i=0; i<methods->size(); i++) {
1175  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
1176  if(m==0) continue;
1177  m->BaseDir()->cd();
1178  m->fTrainHistory.SaveHistory(m->GetMethodName());
1179  }
1180  }
1181 
1182  // delete all methods and recreate them from weight file - this ensures that the application
1183  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1184  // in the testing
1185  //Log() << Endl;
1186  if (fModelPersistence) {
1187 
1188  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1189 
1190  if(!IsSilentFile())RootBaseDir()->cd();
1191 
1192  // iterate through all booked methods
1193  for (UInt_t i=0; i<methods->size(); i++) {
1194 
1195  MethodBase *m = dynamic_cast<MethodBase *>((*methods)[i]);
1196  if (m == nullptr)
1197  continue;
1198 
1199  TMVA::Types::EMVA methodType = m->GetMethodType();
1200  TString weightfile = m->GetWeightFileName();
1201 
1202  // decide if .txt or .xml file should be read:
1203  if (READXML)
1204  weightfile.ReplaceAll(".txt", ".xml");
1205 
1206  DataSetInfo &dataSetInfo = m->DataInfo();
1207  TString testvarName = m->GetTestvarName();
1208  delete m; // itrMethod[i];
1209 
1210  // recreate
1211  m = dynamic_cast<MethodBase *>(ClassifierFactory::Instance().Create(
1212  Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1213  if (m->GetMethodType() == Types::kCategory) {
1214  MethodCategory *methCat = (dynamic_cast<MethodCategory *>(m));
1215  if (!methCat)
1216  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1217  else
1218  methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1219  }
1220  // ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1221 
1222  TString wfileDir = m->DataInfo().GetName();
1223  wfileDir += "/" + gConfig().GetIONames().fWeightFileDir;
1224  m->SetWeightFileDir(wfileDir);
1225  m->SetModelPersistence(fModelPersistence);
1226  m->SetSilentFile(IsSilentFile());
1227  m->SetAnalysisType(fAnalysisType);
1228  m->SetupMethod();
1229  m->ReadStateFromFile();
1230  m->SetTestvarName(testvarName);
1231 
1232  // replace trained method by newly created one (from weight file) in methods vector
1233  (*methods)[i] = m;
1234  }
1235  }
1236  }
1237 }
1238 
1239 ////////////////////////////////////////////////////////////////////////////////
1240 /// Evaluates all booked methods on the testing data and adds the output to the
1241 /// Results in the corresponiding DataSet.
1242 ///
1243 
1245 {
1246  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1247 
1248  // don't do anything if no method booked
1249  if (fMethodsMap.empty()) {
1250  Log() << kINFO << "...nothing found to test" << Endl;
1251  return;
1252  }
1253  std::map<TString,MVector*>::iterator itrMap;
1254 
1255  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1256  {
1257  MVector *methods=itrMap->second;
1258  MVector::iterator itrMethod;
1259 
1260  // iterate over methods and test
1261  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1263  MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1264  if (mva == 0)
1265  continue;
1266  Types::EAnalysisType analysisType = mva->GetAnalysisType();
1267  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1268  << (analysisType == Types::kRegression
1269  ? "Regression"
1270  : (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
1271  << " performance" << Endl << Endl;
1272  mva->AddOutput(Types::kTesting, analysisType);
1273  }
1274  }
1275 }
1276 
1277 ////////////////////////////////////////////////////////////////////////////////
1278 
1279 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
1280 {
1281  if (methodTitle != "") {
1282  IMethod* method = GetMethod(datasetname, methodTitle);
1283  if (method) method->MakeClass();
1284  else {
1285  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
1286  << "\" in list" << Endl;
1287  }
1288  }
1289  else {
1290 
1291  // no classifier specified, print all help messages
1292  MVector *methods=fMethodsMap.find(datasetname)->second;
1293  MVector::const_iterator itrMethod;
1294  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1295  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1296  if(method==0) continue;
1297  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1298  method->MakeClass();
1299  }
1300  }
1301 }
1302 
1303 ////////////////////////////////////////////////////////////////////////////////
1304 /// Print predefined help message of classifier.
1305 /// Iterate over methods and test.
1306 
1307 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1308 {
1309  if (methodTitle != "") {
1310  IMethod* method = GetMethod(datasetname , methodTitle );
1311  if (method) method->PrintHelpMessage();
1312  else {
1313  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1314  << "\" in list" << Endl;
1315  }
1316  }
1317  else {
1318 
1319  // no classifier specified, print all help messages
1320  MVector *methods=fMethodsMap.find(datasetname)->second;
1321  MVector::const_iterator itrMethod ;
1322  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1323  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1324  if(method==0) continue;
1325  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1326  method->PrintHelpMessage();
1327  }
1328  }
1329 }
1330 
1331 ////////////////////////////////////////////////////////////////////////////////
1332 /// Iterates over all MVA input variables and evaluates them.
1333 
1335 {
1336  Log() << kINFO << "Evaluating all variables..." << Endl;
1338 
1339  for (UInt_t i=0; i<loader->GetDataSetInfo().GetNVariables(); i++) {
1340  TString s = loader->GetDataSetInfo().GetVariableInfo(i).GetLabel();
1341  if (options.Contains("V")) s += ":V";
1342  this->BookMethod(loader, "Variable", s );
1343  }
1344 }
1345 
1346 ////////////////////////////////////////////////////////////////////////////////
1347 /// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1348 
1350 {
1351  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1352 
1353  // don't do anything if no method booked
1354  if (fMethodsMap.empty()) {
1355  Log() << kINFO << "...nothing found to evaluate" << Endl;
1356  return;
1357  }
1358  std::map<TString,MVector*>::iterator itrMap;
1359 
1360  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1361  {
1362  MVector *methods=itrMap->second;
1363 
1364  // -----------------------------------------------------------------------
1365  // First part of evaluation process
1366  // --> compute efficiencies, and other separation estimators
1367  // -----------------------------------------------------------------------
1368 
1369  // although equal, we now want to separate the output for the variables
1370  // and the real methods
1371  Int_t isel; // will be 0 for a Method; 1 for a Variable
1372  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1373 
1374  std::vector<std::vector<TString> > mname(2);
1375  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1376  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1377  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1378  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1379 
1380  std::vector<std::vector<Float_t> > multiclass_testEff;
1381  std::vector<std::vector<Float_t> > multiclass_trainEff;
1382  std::vector<std::vector<Float_t> > multiclass_testPur;
1383  std::vector<std::vector<Float_t> > multiclass_trainPur;
1384 
1385  std::vector<std::vector<Float_t> > train_history;
1386 
1387  // Multiclass confusion matrices.
1388  std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1389  std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1390  std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1391  std::vector<TMatrixD> multiclass_testConfusionEffB01;
1392  std::vector<TMatrixD> multiclass_testConfusionEffB10;
1393  std::vector<TMatrixD> multiclass_testConfusionEffB30;
1394 
1395  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1396  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1397  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1398  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1399  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1400  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1401  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1402  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1403  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1404  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1405 
1406  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1407  std::vector<std::vector<Double_t> > biastrainT(1);
1408  std::vector<std::vector<Double_t> > biastestT(1);
1409  std::vector<std::vector<Double_t> > devtrainT(1);
1410  std::vector<std::vector<Double_t> > devtestT(1);
1411  std::vector<std::vector<Double_t> > rmstrainT(1);
1412  std::vector<std::vector<Double_t> > rmstestT(1);
1413  std::vector<std::vector<Double_t> > minftrainT(1);
1414  std::vector<std::vector<Double_t> > minftestT(1);
1415 
1416  // following vector contains all methods - with the exception of Cuts, which are special
1417  MVector methodsNoCuts;
1418 
1419  Bool_t doRegression = kFALSE;
1420  Bool_t doMulticlass = kFALSE;
1421 
1422  // iterate over methods and evaluate
1423  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1425  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1426  if(theMethod==0) continue;
1427  theMethod->SetFile(fgTargetFile);
1428  theMethod->SetSilentFile(IsSilentFile());
1429  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1430 
1431  if (theMethod->DoRegression()) {
1432  doRegression = kTRUE;
1433 
1434  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1435  Double_t bias, dev, rms, mInf;
1436  Double_t biasT, devT, rmsT, mInfT;
1437  Double_t rho;
1438 
1439  Log() << kINFO << "TestRegression (testing)" << Endl;
1440  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1441  biastest[0] .push_back( bias );
1442  devtest[0] .push_back( dev );
1443  rmstest[0] .push_back( rms );
1444  minftest[0] .push_back( mInf );
1445  rhotest[0] .push_back( rho );
1446  biastestT[0] .push_back( biasT );
1447  devtestT[0] .push_back( devT );
1448  rmstestT[0] .push_back( rmsT );
1449  minftestT[0] .push_back( mInfT );
1450 
1451  Log() << kINFO << "TestRegression (training)" << Endl;
1452  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1453  biastrain[0] .push_back( bias );
1454  devtrain[0] .push_back( dev );
1455  rmstrain[0] .push_back( rms );
1456  minftrain[0] .push_back( mInf );
1457  rhotrain[0] .push_back( rho );
1458  biastrainT[0].push_back( biasT );
1459  devtrainT[0] .push_back( devT );
1460  rmstrainT[0] .push_back( rmsT );
1461  minftrainT[0].push_back( mInfT );
1462 
1463  mname[0].push_back( theMethod->GetMethodName() );
1464  nmeth_used[0]++;
1465  if (!IsSilentFile()) {
1466  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1469  }
1470  } else if (theMethod->DoMulticlass()) {
1471  // ====================================================================
1472  // === Multiclass evaluation
1473  // ====================================================================
1474  doMulticlass = kTRUE;
1475  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1476 
1477  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1478  // This is why it is disabled for now.
1479  // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1480  // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1481  // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1482 
1483  theMethod->TestMulticlass();
1484 
1485  // Confusion matrix at three background efficiency levels
1486  multiclass_trainConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTraining));
1487  multiclass_trainConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTraining));
1488  multiclass_trainConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTraining));
1489 
1490  multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1491  multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1492  multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1493 
1494  if (!IsSilentFile()) {
1495  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1498  }
1499 
1500  nmeth_used[0]++;
1501  mname[0].push_back(theMethod->GetMethodName());
1502  } else {
1503 
1504  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1505  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1506 
1507  // perform the evaluation
1508  theMethod->TestClassification();
1509 
1510  // evaluate the classifier
1511  mname[isel].push_back(theMethod->GetMethodName());
1512  sig[isel].push_back(theMethod->GetSignificance());
1513  sep[isel].push_back(theMethod->GetSeparation());
1514  roc[isel].push_back(theMethod->GetROCIntegral());
1515 
1516  Double_t err;
1517  eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1518  eff01err[isel].push_back(err);
1519  eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1520  eff10err[isel].push_back(err);
1521  eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1522  eff30err[isel].push_back(err);
1523  effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1524 
1525  trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1526  trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1527  trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1528 
1529  nmeth_used[isel]++;
1530 
1531  if (!IsSilentFile()) {
1532  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1535  }
1536  }
1537  }
1538  if (doRegression) {
1539 
1540  std::vector<TString> vtemps = mname[0];
1541  std::vector< std::vector<Double_t> > vtmp;
1542  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1543  vtmp.push_back( devtrain[0] );
1544  vtmp.push_back( biastest[0] );
1545  vtmp.push_back( biastrain[0] );
1546  vtmp.push_back( rmstest[0] );
1547  vtmp.push_back( rmstrain[0] );
1548  vtmp.push_back( minftest[0] );
1549  vtmp.push_back( minftrain[0] );
1550  vtmp.push_back( rhotest[0] );
1551  vtmp.push_back( rhotrain[0] );
1552  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1553  vtmp.push_back( devtrainT[0] );
1554  vtmp.push_back( biastestT[0] );
1555  vtmp.push_back( biastrainT[0]);
1556  vtmp.push_back( rmstestT[0] );
1557  vtmp.push_back( rmstrainT[0] );
1558  vtmp.push_back( minftestT[0] );
1559  vtmp.push_back( minftrainT[0]);
1560  gTools().UsefulSortAscending( vtmp, &vtemps );
1561  mname[0] = vtemps;
1562  devtest[0] = vtmp[0];
1563  devtrain[0] = vtmp[1];
1564  biastest[0] = vtmp[2];
1565  biastrain[0] = vtmp[3];
1566  rmstest[0] = vtmp[4];
1567  rmstrain[0] = vtmp[5];
1568  minftest[0] = vtmp[6];
1569  minftrain[0] = vtmp[7];
1570  rhotest[0] = vtmp[8];
1571  rhotrain[0] = vtmp[9];
1572  devtestT[0] = vtmp[10];
1573  devtrainT[0] = vtmp[11];
1574  biastestT[0] = vtmp[12];
1575  biastrainT[0] = vtmp[13];
1576  rmstestT[0] = vtmp[14];
1577  rmstrainT[0] = vtmp[15];
1578  minftestT[0] = vtmp[16];
1579  minftrainT[0] = vtmp[17];
1580  } else if (doMulticlass) {
1581  // TODO: fill in something meaningful
1582  // If there is some ranking of methods to be done it should be done here.
1583  // However, this is not so easy to define for multiclass so it is left out for now.
1584 
1585  }
1586  else {
1587  // now sort the variables according to the best 'eff at Beff=0.10'
1588  for (Int_t k=0; k<2; k++) {
1589  std::vector< std::vector<Double_t> > vtemp;
1590  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1591  vtemp.push_back( eff10[k] );
1592  vtemp.push_back( eff01[k] );
1593  vtemp.push_back( eff30[k] );
1594  vtemp.push_back( eff10err[k] );
1595  vtemp.push_back( eff01err[k] );
1596  vtemp.push_back( eff30err[k] );
1597  vtemp.push_back( trainEff10[k] );
1598  vtemp.push_back( trainEff01[k] );
1599  vtemp.push_back( trainEff30[k] );
1600  vtemp.push_back( sig[k] );
1601  vtemp.push_back( sep[k] );
1602  vtemp.push_back( roc[k] );
1603  std::vector<TString> vtemps = mname[k];
1604  gTools().UsefulSortDescending( vtemp, &vtemps );
1605  effArea[k] = vtemp[0];
1606  eff10[k] = vtemp[1];
1607  eff01[k] = vtemp[2];
1608  eff30[k] = vtemp[3];
1609  eff10err[k] = vtemp[4];
1610  eff01err[k] = vtemp[5];
1611  eff30err[k] = vtemp[6];
1612  trainEff10[k] = vtemp[7];
1613  trainEff01[k] = vtemp[8];
1614  trainEff30[k] = vtemp[9];
1615  sig[k] = vtemp[10];
1616  sep[k] = vtemp[11];
1617  roc[k] = vtemp[12];
1618  mname[k] = vtemps;
1619  }
1620  }
1621 
1622  // -----------------------------------------------------------------------
1623  // Second part of evaluation process
1624  // --> compute correlations among MVAs
1625  // --> compute correlations between input variables and MVA (determines importance)
1626  // --> count overlaps
1627  // -----------------------------------------------------------------------
1628  if(fCorrelations)
1629  {
1630  const Int_t nmeth = methodsNoCuts.size();
1631  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1632  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1633  if (!doRegression && !doMulticlass ) {
1634 
1635  if (nmeth > 0) {
1636 
1637  // needed for correlations
1638  Double_t *dvec = new Double_t[nmeth+nvar];
1639  std::vector<Double_t> rvec;
1640 
1641  // for correlations
1642  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1643  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1644 
1645  // set required tree branch references
1646  Int_t ivar = 0;
1647  std::vector<TString>* theVars = new std::vector<TString>;
1648  std::vector<ResultsClassification*> mvaRes;
1649  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1650  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1651  if(m==0) continue;
1652  theVars->push_back( m->GetTestvarName() );
1653  rvec.push_back( m->GetSignalReferenceCut() );
1654  theVars->back().ReplaceAll( "MVA_", "" );
1655  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1658  }
1659 
1660  // for overlap study
1661  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1662  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1663  (*overlapS) *= 0; // init...
1664  (*overlapB) *= 0; // init...
1665 
1666  // loop over test tree
1667  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1669  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1670  const Event* ev = defDs->GetEvent(ievt);
1671 
1672  // for correlations
1673  TMatrixD* theMat = 0;
1674  for (Int_t im=0; im<nmeth; im++) {
1675  // check for NaN value
1676  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1677  if (TMath::IsNaN(retval)) {
1678  Log() << kWARNING << "Found NaN return value in event: " << ievt
1679  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1680  dvec[im] = 0;
1681  }
1682  else dvec[im] = retval;
1683  }
1684  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1685  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1686  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1687 
1688  // count overlaps
1689  for (Int_t im=0; im<nmeth; im++) {
1690  for (Int_t jm=im; jm<nmeth; jm++) {
1691  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1692  (*theMat)(im,jm)++;
1693  if (im != jm) (*theMat)(jm,im)++;
1694  }
1695  }
1696  }
1697  }
1698 
1699  // renormalise overlap matrix
1700  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1701  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1702 
1703  tpSig->MakePrincipals();
1704  tpBkg->MakePrincipals();
1705 
1706  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1707  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1708 
1709  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1710  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1711 
1712  // print correlation matrices
1713  if (corrMatS != 0 && corrMatB != 0) {
1714 
1715  // extract MVA matrix
1716  TMatrixD mvaMatS(nmeth,nmeth);
1717  TMatrixD mvaMatB(nmeth,nmeth);
1718  for (Int_t im=0; im<nmeth; im++) {
1719  for (Int_t jm=0; jm<nmeth; jm++) {
1720  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1721  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1722  }
1723  }
1724 
1725  // extract variables - to MVA matrix
1726  std::vector<TString> theInputVars;
1727  TMatrixD varmvaMatS(nvar,nmeth);
1728  TMatrixD varmvaMatB(nvar,nmeth);
1729  for (Int_t iv=0; iv<nvar; iv++) {
1730  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1731  for (Int_t jm=0; jm<nmeth; jm++) {
1732  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1733  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1734  }
1735  }
1736 
1737  if (nmeth > 1) {
1738  Log() << kINFO << Endl;
1739  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1740  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1741  Log() << kINFO << Endl;
1742 
1743  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1744  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1745  Log() << kINFO << Endl;
1746  }
1747 
1748  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1749  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1750  Log() << kINFO << Endl;
1751 
1752  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1753  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1754  Log() << kINFO << Endl;
1755  }
1756  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1757 
1758  // print overlap matrices
1759  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1760  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1761  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1762  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1763  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1764 
1765  // give notice that cut method has been excluded from this test
1766  if (nmeth != (Int_t)methods->size())
1767  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1768 
1769  if (nmeth > 1) {
1770  Log() << kINFO << Endl;
1771  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1772  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1773  Log() << kINFO << Endl;
1774 
1775  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1776  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1777  }
1778 
1779  // cleanup
1780  delete tpSig;
1781  delete tpBkg;
1782  delete corrMatS;
1783  delete corrMatB;
1784  delete theVars;
1785  delete overlapS;
1786  delete overlapB;
1787  delete [] dvec;
1788  }
1789  }
1790  }
1791  // -----------------------------------------------------------------------
1792  // Third part of evaluation process
1793  // --> output
1794  // -----------------------------------------------------------------------
1795 
1796  if (doRegression) {
1797 
1798  Log() << kINFO << Endl;
1799  TString hLine = "--------------------------------------------------------------------------------------------------";
1800  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1801  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1802  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1803  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1804  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1805  Log() << kINFO << hLine << Endl;
1806  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1807  Log() << kINFO << hLine << Endl;
1808 
1809  for (Int_t i=0; i<nmeth_used[0]; i++) {
1810  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1811  if(theMethod==0) continue;
1812 
1813  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1814  theMethod->fDataSetInfo.GetName(),
1815  (const char*)mname[0][i],
1816  biastest[0][i], biastestT[0][i],
1817  rmstest[0][i], rmstestT[0][i],
1818  minftest[0][i], minftestT[0][i] )
1819  << Endl;
1820  }
1821  Log() << kINFO << hLine << Endl;
1822  Log() << kINFO << Endl;
1823  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1824  Log() << kINFO << "(overtraining check)" << Endl;
1825  Log() << kINFO << hLine << Endl;
1826  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1827  Log() << kINFO << hLine << Endl;
1828 
1829  for (Int_t i=0; i<nmeth_used[0]; i++) {
1830  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1831  if(theMethod==0) continue;
1832  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1833  theMethod->fDataSetInfo.GetName(),
1834  (const char*)mname[0][i],
1835  biastrain[0][i], biastrainT[0][i],
1836  rmstrain[0][i], rmstrainT[0][i],
1837  minftrain[0][i], minftrainT[0][i] )
1838  << Endl;
1839  }
1840  Log() << kINFO << hLine << Endl;
1841  Log() << kINFO << Endl;
1842  } else if (doMulticlass) {
1843  // ====================================================================
1844  // === Multiclass Output
1845  // ====================================================================
1846 
1847  TString hLine =
1848  "-------------------------------------------------------------------------------------------------------";
1849 
1850  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1851  // This is why it is disabled for now.
1852  //
1853  // // --- Acheivable signal efficiency * signal purity
1854  // // --------------------------------------------------------------------
1855  // Log() << kINFO << Endl;
1856  // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1857  // Log() << kINFO << hLine << Endl;
1858 
1859  // // iterate over methods and evaluate
1860  // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1861  // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1862  // if (theMethod == 0) {
1863  // continue;
1864  // }
1865 
1866  // TString header = "DataSet Name MVA Method ";
1867  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1868  // header += Form("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1869  // }
1870 
1871  // Log() << kINFO << header << Endl;
1872  // Log() << kINFO << hLine << Endl;
1873  // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1874  // TString res = Form("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), (const char *)mname[0][i]);
1875  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1876  // res += Form("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1877  // }
1878  // Log() << kINFO << res << Endl;
1879  // }
1880 
1881  // Log() << kINFO << hLine << Endl;
1882  // Log() << kINFO << Endl;
1883  // }
1884 
1885  // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1886  // --------------------------------------------------------------------
1887  TString header1 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Dataset", "MVA Method", "ROC AUC", "Sig eff@B=0.01",
1888  "Sig eff@B=0.10", "Sig eff@B=0.30");
1889  TString header2 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Name:", "/ Class:", "test (train)", "test (train)",
1890  "test (train)", "test (train)");
1891  Log() << kINFO << Endl;
1892  Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1893  Log() << kINFO << hLine << Endl;
1894  Log() << kINFO << Endl;
1895  Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1896  Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1897  Log() << kINFO << "A score of 0.820 (0.850) means 0.820 was acheived on the" << Endl;
1898  Log() << kINFO << "test set and 0.850 on the training set." << Endl;
1899 
1900  Log() << kINFO << Endl;
1901  Log() << kINFO << header1 << Endl;
1902  Log() << kINFO << header2 << Endl;
1903  for (Int_t k = 0; k < 2; k++) {
1904  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1905  if (k == 1) {
1906  mname[k][i].ReplaceAll("Variable_", "");
1907  }
1908 
1909  const TString datasetName = itrMap->first;
1910  const TString mvaName = mname[k][i];
1911 
1912  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1913  if (theMethod == 0) {
1914  continue;
1915  }
1916 
1917  Log() << kINFO << Endl;
1918  TString row = Form("%-15s%-15s", datasetName.Data(), mvaName.Data());
1919  Log() << kINFO << row << Endl;
1920  Log() << kINFO << "------------------------------" << Endl;
1921 
1922  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1923  for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1924 
1925  ROCCurve *rocCurveTrain = GetROC(datasetName, mvaName, iClass, Types::kTraining);
1926  ROCCurve *rocCurveTest = GetROC(datasetName, mvaName, iClass, Types::kTesting);
1927 
1928  const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1929  const Double_t rocaucTrain = rocCurveTrain->GetROCIntegral();
1930  const Double_t effB01Train = rocCurveTrain->GetEffSForEffB(0.01);
1931  const Double_t effB10Train = rocCurveTrain->GetEffSForEffB(0.10);
1932  const Double_t effB30Train = rocCurveTrain->GetEffSForEffB(0.30);
1933  const Double_t rocaucTest = rocCurveTest->GetROCIntegral();
1934  const Double_t effB01Test = rocCurveTest->GetEffSForEffB(0.01);
1935  const Double_t effB10Test = rocCurveTest->GetEffSForEffB(0.10);
1936  const Double_t effB30Test = rocCurveTest->GetEffSForEffB(0.30);
1937  const TString rocaucCmp = Form("%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1938  const TString effB01Cmp = Form("%5.3f (%5.3f)", effB01Test, effB01Train);
1939  const TString effB10Cmp = Form("%5.3f (%5.3f)", effB10Test, effB10Train);
1940  const TString effB30Cmp = Form("%5.3f (%5.3f)", effB30Test, effB30Train);
1941  row = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "", className.Data(), rocaucCmp.Data(), effB01Cmp.Data(),
1942  effB10Cmp.Data(), effB30Cmp.Data());
1943  Log() << kINFO << row << Endl;
1944 
1945  delete rocCurveTrain;
1946  delete rocCurveTest;
1947  }
1948  }
1949  }
1950  Log() << kINFO << Endl;
1951  Log() << kINFO << hLine << Endl;
1952  Log() << kINFO << Endl;
1953 
1954  // --- Confusion matrices
1955  // --------------------------------------------------------------------
1956  auto printMatrix = [](TMatrixD const &matTraining, TMatrixD const &matTesting, std::vector<TString> classnames,
1957  UInt_t numClasses, MsgLogger &stream) {
1958  // assert (classLabledWidth >= valueLabelWidth + 2)
1959  // if (...) {Log() << kWARN << "..." << Endl; }
1960 
1961  // TODO: Ensure matrices are same size.
1962 
1963  TString header = Form(" %-14s", " ");
1964  TString headerInfo = Form(" %-14s", " ");
1965  ;
1966  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1967  header += Form(" %-14s", classnames[iCol].Data());
1968  headerInfo += Form(" %-14s", " test (train)");
1969  }
1970  stream << kINFO << header << Endl;
1971  stream << kINFO << headerInfo << Endl;
1972 
1973  for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1974  stream << kINFO << Form(" %-14s", classnames[iRow].Data());
1975 
1976  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1977  if (iCol == iRow) {
1978  stream << kINFO << Form(" %-14s", "-");
1979  } else {
1980  Double_t trainValue = matTraining[iRow][iCol];
1981  Double_t testValue = matTesting[iRow][iCol];
1982  TString entry = Form("%-5.3f (%-5.3f)", testValue, trainValue);
1983  stream << kINFO << Form(" %-14s", entry.Data());
1984  }
1985  }
1986  stream << kINFO << Endl;
1987  }
1988  };
1989 
1990  Log() << kINFO << Endl;
1991  Log() << kINFO << "Confusion matrices for all methods" << Endl;
1992  Log() << kINFO << hLine << Endl;
1993  Log() << kINFO << Endl;
1994  Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
1995  Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
1996  Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
1997  Log() << kINFO << "by the column index is considered background." << Endl;
1998  Log() << kINFO << Endl;
1999  for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
2000  MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
2001  if (theMethod == nullptr) {
2002  continue;
2003  }
2004  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
2005 
2006  std::vector<TString> classnames;
2007  for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2008  classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
2009  }
2010  Log() << kINFO
2011  << "=== Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
2012  << Endl;
2013  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
2014  Log() << kINFO << "---------------------------------------------------" << Endl;
2015  printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2016  numClasses, Log());
2017  Log() << kINFO << Endl;
2018 
2019  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
2020  Log() << kINFO << "---------------------------------------------------" << Endl;
2021  printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2022  numClasses, Log());
2023  Log() << kINFO << Endl;
2024 
2025  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
2026  Log() << kINFO << "---------------------------------------------------" << Endl;
2027  printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2028  numClasses, Log());
2029  Log() << kINFO << Endl;
2030  }
2031  Log() << kINFO << hLine << Endl;
2032  Log() << kINFO << Endl;
2033 
2034  } else {
2035  // Binary classification
2036  if (fROC) {
2037  Log().EnableOutput();
2039  Log() << Endl;
2040  TString hLine = "------------------------------------------------------------------------------------------"
2041  "-------------------------";
2042  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
2043  Log() << kINFO << hLine << Endl;
2044  Log() << kINFO << "DataSet MVA " << Endl;
2045  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
2046 
2047  // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
2048  // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
2049  // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
2050  Log() << kDEBUG << hLine << Endl;
2051  for (Int_t k = 0; k < 2; k++) {
2052  if (k == 1 && nmeth_used[k] > 0) {
2053  Log() << kINFO << hLine << Endl;
2054  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2055  }
2056  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2057  TString datasetName = itrMap->first;
2058  TString methodName = mname[k][i];
2059 
2060  if (k == 1) {
2061  methodName.ReplaceAll("Variable_", "");
2062  }
2063 
2064  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
2065  if (theMethod == 0) {
2066  continue;
2067  }
2068 
2069  TMVA::DataSet *dataset = theMethod->Data();
2070  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
2071  std::vector<Bool_t> *mvaResType =
2072  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
2073 
2074  Double_t rocIntegral = 0.0;
2075  if (mvaResType->size() != 0) {
2076  rocIntegral = GetROCIntegral(datasetName, methodName);
2077  }
2078 
2079  if (sep[k][i] < 0 || sig[k][i] < 0) {
2080  // cannot compute separation/significance -> no MVA (usually for Cuts)
2081  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
2082  << Endl;
2083 
2084  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2085  // %#1.3f %#1.3f | -- --",
2086  // datasetName.Data(),
2087  // methodName.Data(),
2088  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2089  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2090  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2091  // effArea[k][i],rocIntegral) << Endl;
2092  } else {
2093  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
2094  << Endl;
2095  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2096  // %#1.3f %#1.3f | %#1.3f %#1.3f",
2097  // datasetName.Data(),
2098  // methodName.Data(),
2099  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2100  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2101  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2102  // effArea[k][i],rocIntegral,
2103  // sep[k][i], sig[k][i]) << Endl;
2104  }
2105  }
2106  }
2107  Log() << kINFO << hLine << Endl;
2108  Log() << kINFO << Endl;
2109  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
2110  Log() << kINFO << hLine << Endl;
2111  Log() << kINFO
2112  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2113  << Endl;
2114  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2115  << Endl;
2116  Log() << kINFO << hLine << Endl;
2117  for (Int_t k = 0; k < 2; k++) {
2118  if (k == 1 && nmeth_used[k] > 0) {
2119  Log() << kINFO << hLine << Endl;
2120  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2121  }
2122  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2123  if (k == 1) mname[k][i].ReplaceAll("Variable_", "");
2124  MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2125  if (theMethod == 0) continue;
2126 
2127  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2128  theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2129  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2130  << Endl;
2131  }
2132  }
2133  Log() << kINFO << hLine << Endl;
2134  Log() << kINFO << Endl;
2135 
2136  if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput();
2137  } // end fROC
2138  }
2139  if(!IsSilentFile())
2140  {
2141  std::list<TString> datasets;
2142  for (Int_t k=0; k<2; k++) {
2143  for (Int_t i=0; i<nmeth_used[k]; i++) {
2144  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
2145  if(theMethod==0) continue;
2146  // write test/training trees
2147  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2148  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
2149  {
2152  datasets.push_back(theMethod->fDataSetInfo.GetName());
2153  }
2154  }
2155  }
2156  }
2157  }//end for MethodsMap
2158  // references for citation
2160 }
2161 
2162 ////////////////////////////////////////////////////////////////////////////////
2163 /// Evaluate Variable Importance
2164 
2165 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2166 {
2167  fModelPersistence=kFALSE;
2168  fSilentFile=kTRUE;//we need silent file here because we need fast classification results
2169 
2170  //getting number of variables and variable names from loader
2171  const int nbits = loader->GetDataSetInfo().GetNVariables();
2172  if(vitype==VIType::kShort)
2173  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2174  else if(vitype==VIType::kAll)
2175  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2176  else if(vitype==VIType::kRandom&&nbits>10)
2177  {
2178  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
2179  }else
2180  {
2181  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2182  return nullptr;
2183  }
2184 }
2185 
2186 ////////////////////////////////////////////////////////////////////////////////
2187 
2188 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2189 {
2190 
2191  uint64_t x = 0;
2192  uint64_t y = 0;
2193 
2194  //getting number of variables and variable names from loader
2195  const int nbits = loader->GetDataSetInfo().GetNVariables();
2196  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2197 
2198  uint64_t range = pow(2, nbits);
2199 
2200  //vector to save importances
2201  std::vector<Double_t> importances(nbits);
2202  //vector to save ROC
2203  std::vector<Double_t> ROC(range);
2204  ROC[0]=0.5;
2205  for (int i = 0; i < nbits; i++)importances[i] = 0;
2206 
2207  Double_t SROC, SSROC; //computed ROC value
2208  for ( x = 1; x <range ; x++) {
2209 
2210  std::bitset<VIBITS> xbitset(x);
2211  if (x == 0) continue; //data loader need at least one variable
2212 
2213  //creating loader for seed
2214  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2215 
2216  //adding variables from seed
2217  for (int index = 0; index < nbits; index++) {
2218  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2219  }
2220 
2221  DataLoaderCopy(seedloader,loader);
2222  seedloader->PrepareTrainingAndTestTree(loader->GetDataSetInfo().GetCut("Signal"), loader->GetDataSetInfo().GetCut("Background"), loader->GetDataSetInfo().GetSplitOptions());
2223 
2224  //Booking Seed
2225  BookMethod(seedloader, theMethod, methodTitle, theOption);
2226 
2227  //Train/Test/Evaluation
2228  TrainAllMethods();
2229  TestAllMethods();
2230  EvaluateAllMethods();
2231 
2232  //getting ROC
2233  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2234 
2235  //cleaning information to process sub-seeds
2236  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2237  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2238  delete sresults;
2239  delete seedloader;
2240  this->DeleteAllMethods();
2241 
2242  fMethodsMap.clear();
2243  //removing global result because it is requiring a lot of RAM for all seeds
2244  }
2245 
2246 
2247  for ( x = 0; x <range ; x++)
2248  {
2249  SROC=ROC[x];
2250  for (uint32_t i = 0; i < VIBITS; ++i) {
2251  if (x & (uint64_t(1) << i)) {
2252  y = x & ~(1 << i);
2253  std::bitset<VIBITS> ybitset(y);
2254  //need at least one variable
2255  //NOTE: if sub-seed is zero then is the special case
2256  //that count in xbitset is 1
2257  Double_t ny = log(x - y) / 0.693147;
2258  if (y == 0) {
2259  importances[ny] = SROC - 0.5;
2260  continue;
2261  }
2262 
2263  //getting ROC
2264  SSROC = ROC[y];
2265  importances[ny] += SROC - SSROC;
2266  //cleaning information
2267  }
2268 
2269  }
2270  }
2271  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
2272  return GetImportance(nbits,importances,varNames);
2273 }
2274 
2275 static long int sum(long int i)
2276 {
2277  long int _sum=0;
2278  for(long int n=0;n<i;n++) _sum+=pow(2,n);
2279  return _sum;
2280 }
2281 
2282 ////////////////////////////////////////////////////////////////////////////////
2283 
2284 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2285 {
2286  uint64_t x = 0;
2287  uint64_t y = 0;
2288 
2289  //getting number of variables and variable names from loader
2290  const int nbits = loader->GetDataSetInfo().GetNVariables();
2291  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2292 
2293  long int range = sum(nbits);
2294 // std::cout<<range<<std::endl;
2295  //vector to save importances
2296  std::vector<Double_t> importances(nbits);
2297  for (int i = 0; i < nbits; i++)importances[i] = 0;
2298 
2299  Double_t SROC, SSROC; //computed ROC value
2300 
2301  x = range;
2302 
2303  std::bitset<VIBITS> xbitset(x);
2304  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //data loader need at least one variable
2305 
2306 
2307  //creating loader for seed
2308  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2309 
2310  //adding variables from seed
2311  for (int index = 0; index < nbits; index++) {
2312  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2313  }
2314 
2315  //Loading Dataset
2316  DataLoaderCopy(seedloader,loader);
2317 
2318  //Booking Seed
2319  BookMethod(seedloader, theMethod, methodTitle, theOption);
2320 
2321  //Train/Test/Evaluation
2322  TrainAllMethods();
2323  TestAllMethods();
2324  EvaluateAllMethods();
2325 
2326  //getting ROC
2327  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2328 
2329  //cleaning information to process sub-seeds
2330  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2331  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2332  delete sresults;
2333  delete seedloader;
2334  this->DeleteAllMethods();
2335  fMethodsMap.clear();
2336 
2337  //removing global result because it is requiring a lot of RAM for all seeds
2338 
2339  for (uint32_t i = 0; i < VIBITS; ++i) {
2340  if (x & (1 << i)) {
2341  y = x & ~(uint64_t(1) << i);
2342  std::bitset<VIBITS> ybitset(y);
2343  //need at least one variable
2344  //NOTE: if sub-seed is zero then is the special case
2345  //that count in xbitset is 1
2346  Double_t ny = log(x - y) / 0.693147;
2347  if (y == 0) {
2348  importances[ny] = SROC - 0.5;
2349  continue;
2350  }
2351 
2352  //creating loader for sub-seed
2353  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2354  //adding variables from sub-seed
2355  for (int index = 0; index < nbits; index++) {
2356  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2357  }
2358 
2359  //Loading Dataset
2360  DataLoaderCopy(subseedloader,loader);
2361 
2362  //Booking SubSeed
2363  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2364 
2365  //Train/Test/Evaluation
2366  TrainAllMethods();
2367  TestAllMethods();
2368  EvaluateAllMethods();
2369 
2370  //getting ROC
2371  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2372  importances[ny] += SROC - SSROC;
2373 
2374  //cleaning information
2375  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2377  delete ssresults;
2378  delete subseedloader;
2379  this->DeleteAllMethods();
2380  fMethodsMap.clear();
2381  }
2382  }
2383  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
2384  return GetImportance(nbits,importances,varNames);
2385 }
2386 
2387 ////////////////////////////////////////////////////////////////////////////////
2388 
2389 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2390 {
2391  TRandom3 *rangen = new TRandom3(0); //Random Gen.
2392 
2393  uint64_t x = 0;
2394  uint64_t y = 0;
2395 
2396  //getting number of variables and variable names from loader
2397  const int nbits = loader->GetDataSetInfo().GetNVariables();
2398  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2399 
2400  long int range = pow(2, nbits);
2401 
2402  //vector to save importances
2403  std::vector<Double_t> importances(nbits);
2404  Double_t importances_norm = 0;
2405  for (int i = 0; i < nbits; i++)importances[i] = 0;
2406 
2407  Double_t SROC, SSROC; //computed ROC value
2408  for (UInt_t n = 0; n < nseeds; n++) {
2409  x = rangen -> Integer(range);
2410 
2411  std::bitset<32> xbitset(x);
2412  if (x == 0) continue; //data loader need at least one variable
2413 
2414 
2415  //creating loader for seed
2416  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2417 
2418  //adding variables from seed
2419  for (int index = 0; index < nbits; index++) {
2420  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2421  }
2422 
2423  //Loading Dataset
2424  DataLoaderCopy(seedloader,loader);
2425 
2426  //Booking Seed
2427  BookMethod(seedloader, theMethod, methodTitle, theOption);
2428 
2429  //Train/Test/Evaluation
2430  TrainAllMethods();
2431  TestAllMethods();
2432  EvaluateAllMethods();
2433 
2434  //getting ROC
2435  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2436 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2437 
2438  //cleaning information to process sub-seeds
2439  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2440  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2441  delete sresults;
2442  delete seedloader;
2443  this->DeleteAllMethods();
2444  fMethodsMap.clear();
2445 
2446  //removing global result because it is requiring a lot of RAM for all seeds
2447 
2448  for (uint32_t i = 0; i < 32; ++i) {
2449  if (x & (uint64_t(1) << i)) {
2450  y = x & ~(1 << i);
2451  std::bitset<32> ybitset(y);
2452  //need at least one variable
2453  //NOTE: if sub-seed is zero then is the special case
2454  //that count in xbitset is 1
2455  Double_t ny = log(x - y) / 0.693147;
2456  if (y == 0) {
2457  importances[ny] = SROC - 0.5;
2458  importances_norm += importances[ny];
2459  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2460  continue;
2461  }
2462 
2463  //creating loader for sub-seed
2464  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2465  //adding variables from sub-seed
2466  for (int index = 0; index < nbits; index++) {
2467  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2468  }
2469 
2470  //Loading Dataset
2471  DataLoaderCopy(subseedloader,loader);
2472 
2473  //Booking SubSeed
2474  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2475 
2476  //Train/Test/Evaluation
2477  TrainAllMethods();
2478  TestAllMethods();
2479  EvaluateAllMethods();
2480 
2481  //getting ROC
2482  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2483  importances[ny] += SROC - SSROC;
2484  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
2485  //cleaning information
2486  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2488  delete ssresults;
2489  delete subseedloader;
2490  this->DeleteAllMethods();
2491  fMethodsMap.clear();
2492  }
2493  }
2494  }
2495  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2496  return GetImportance(nbits,importances,varNames);
2497 }
2498 
2499 ////////////////////////////////////////////////////////////////////////////////
2500 
2501 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2502 {
2503  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2504 
2505  gStyle->SetOptStat(000000);
2506 
2507  Float_t normalization = 0.0;
2508  for (int i = 0; i < nbits; i++) {
2509  normalization = normalization + importances[i];
2510  }
2511 
2512  Float_t roc = 0.0;
2513 
2514  gStyle->SetTitleXOffset(0.4);
2515  gStyle->SetTitleXOffset(1.2);
2516 
2517 
2518  std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2519  for (Int_t i = 1; i < nbits + 1; i++) {
2520  x_ie[i - 1] = (i - 1) * 1.;
2521  roc = 100.0 * importances[i - 1] / normalization;
2522  y_ie[i - 1] = roc;
2523  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2524  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2525  vih1->SetBinContent(i, roc);
2526  }
2527  TGraph *g_ie = new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
2528  g_ie->SetTitle("");
2529 
2530  vih1->LabelsOption("v >", "X");
2531  vih1->SetBarWidth(0.97);
2532  Int_t ca = TColor::GetColor("#006600");
2533  vih1->SetFillColor(ca);
2534  //Int_t ci = TColor::GetColor("#990000");
2535 
2536  vih1->GetYaxis()->SetTitle("Importance (%)");
2537  vih1->GetYaxis()->SetTitleSize(0.045);
2538  vih1->GetYaxis()->CenterTitle();
2539  vih1->GetYaxis()->SetTitleOffset(1.24);
2540 
2541  vih1->GetYaxis()->SetRangeUser(-7, 50);
2542  vih1->SetDirectory(0);
2543 
2544 // vih1->Draw("B");
2545  return vih1;
2546 }
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
Definition: TAttAxis.cxx:294
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:381
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2501
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Definition: DataSetInfo.h:125
static long int sum(long int i)
Definition: Factory.cxx:2275
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:575
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:345
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Definition: ROCCurve.cxx:220
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
auto * m
Definition: textangle.C:8
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:5222
Double_t Log(Double_t x)
Definition: TMath.h:750
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1334
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:213
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:251
float Float_t
Definition: RtypesCore.h:55
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8393
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1336
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
R__EXTERN TStyle * gStyle
Definition: TStyle.h:410
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2284
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:625
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:139
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:53
Bool_t Verbose(void) const
Definition: Factory.h:135
EAnalysisType
Definition: Types.h:127
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:36
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:823
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
#define gROOT
Definition: TROOT.h:406
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:209
Basic string class.
Definition: TString.h:131
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
int Int_t
Definition: RtypesCore.h:43
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1093
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Change (i.e.
Definition: TGraph.cxx:2324
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t IsNaN(Double_t x)
Definition: TMath.h:882
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:972
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:596
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:227
const TString & GetLabel() const
Definition: VariableInfo.h:59
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:377
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
Bool_t CheckForSilentOption(const TString &) const
check for "silence" option in configuration option string
Definition: Tools.cxx:702
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:184
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
MsgLogger * fLogger
Definition: Configurable.h:128
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:486
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2389
RPY_EXPORTED TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1226
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:356
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:903
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:939
TString fWeightFileDirPrefix
Definition: Config.h:123
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:391
#define READXML
Definition: Factory.cxx:103
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:605
TAxis * GetXaxis()
Get x axis of the graph.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2165
Bool_t fModelPersistence
the training type
Definition: Factory.h:219
DataSet * Data() const
Definition: MethodBase.h:408
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:608
TString fWeightFileDir
Definition: Config.h:124
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
TString fVerboseLevel
verbose mode
Definition: Factory.h:211
void PrintHelpMessage() const
prints out method-specific help method
IONames & GetIONames()
Definition: Config.h:100
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:408
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
Bool_t DoRegression() const
Definition: MethodBase.h:438
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:71
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2188
void GetMethodName(TString &name, TKey *mkey)
Definition: tmvaglob.cxx:335
Class that contains all the data information.
Definition: DataSetInfo.h:60
static constexpr double s
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
overwrite existing object with same name
Definition: TObject.h:88
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
Bool_t DoMulticlass() const
Definition: MethodBase.h:439
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:98
Class that contains all the data information.
Definition: DataSet.h:69
virtual ~Factory()
Destructor.
Definition: Factory.cxx:299
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:694
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9587
UInt_t GetNTargets() const
Definition: DataSetInfo.h:126
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:264
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
Definition: Factory.cxx:842
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:192
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:556
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1313
const char * GetName() const
Definition: MethodBase.h:333
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:426
ClassInfo * GetClassInfo(Int_t clNum) const
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:579
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbosity level, controls granularity of logging
Definition: Factory.h:212
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
Definition: Factory.cxx:1349
class TMVA::Config::VariablePlotting fVariablePlotting
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8678
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition: Factory.cxx:1244
unsigned int UInt_t
Definition: RtypesCore.h:44
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
Ssiz_t Length() const
Definition: TString.h:405
TAxis * GetYaxis()
Get y axis of the graph.
const TString & GetMethodName() const
Definition: MethodBase.h:330
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1769
TAxis * GetYaxis()
Definition: TH1.h:317
Class that contains all the data information.
void Greetings()
Print welcome message.
Definition: Factory.cxx:289
This is the main MVA steering class.
Definition: Factory.h:81
Tools & gTools()
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:337
TFile * fgTargetFile
Definition: Factory.h:202
virtual Double_t GetSignificance() const
compute significance of mean difference
TString fName
Definition: TNamed.h:32
Definition: graph.py:1
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:434
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
Definition: TAttAxis.cxx:304
#define h(i)
Definition: RSha256.hxx:106
const Bool_t kFALSE
Definition: RtypesCore.h:90
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:236
Class for categorizing the phase space.
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
The Canvas class.
Definition: TCanvas.h:27
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:633
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:435
#define ClassImp(name)
Definition: Rtypes.h:361
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
Definition: Factory.cxx:743
double Double_t
Definition: RtypesCore.h:57
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:335
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Definition: MethodBase.h:374
Double_t y[n]
Definition: legend1.C:17
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
UInt_t GetEntries(const TString &name) const
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:549
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:103
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:820
void AddPreDefVal(const T &)
Definition: Configurable.h:168
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1327
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:69
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:425
void PrintVariableRanking() const
prints ranking of input variables
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Bool_t IsNull() const
Definition: TString.h:402
const TString & GetOptions() const
Definition: Configurable.h:84
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:74
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:898
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void SetUseColor(Bool_t uc)
Definition: Config.h:62
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
#define VIBITS
Definition: Factory.cxx:106
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:390
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1307
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:277
TList * GetListOfGraphs() const
Definition: TMultiGraph.h:70
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:979
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:184
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:118
DataInputHandler & DataInput()
TString GetMethodTypeName() const
Definition: MethodBase.h:331
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:598
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetSilent(Bool_t s)
Definition: Config.h:65
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:166
void SetWeightFileDir(TString fileDir)
set directory of weight file
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:317
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1590
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1210
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:437
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:218
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6345
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1452
std::vector< IMethod * > MVector
Definition: Factory.h:85
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1279
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:561
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
const Bool_t kTRUE
Definition: RtypesCore.h:89
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
Definition: MethodBase.h:332
void CheckForUnusedOptions() const
checks for unused options in option string
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:201
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:436
char name[80]
Definition: TGX11.cxx:109
double log(double)
Class that is the base-class for a vector of result.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Bool_t fVerbose
list of transformations to test
Definition: Factory.h:210
const char * Data() const
Definition: TString.h:364
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Definition: Factory.cxx:497