Logo ROOT   master
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Kim Albertsson
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25  * *
26  * Copyright (c) 2005-2015: *
27  * CERN, Switzerland *
28  * U. of Victoria, Canada *
29  * MPI-K Heidelberg, Germany *
30  * U. of Bonn, Germany *
31  * UdeA/ITM, Colombia *
32  * U. of Florida, USA *
33  * *
34  * Redistribution and use in source and binary forms, with or without *
35  * modification, are permitted according to the terms listed in LICENSE *
36  * (http://tmva.sourceforge.net/LICENSE) *
37  **********************************************************************************/
38 
39 /*! \class TMVA::Factory
40 \ingroup TMVA
41 
42 This is the main MVA steering class.
43 It creates all MVA methods, and guides them through the training, testing and
44 evaluation phases.
45 */
46 
47 #include "TMVA/Factory.h"
48 
49 #include "TMVA/ClassifierFactory.h"
50 #include "TMVA/Config.h"
51 #include "TMVA/Configurable.h"
52 #include "TMVA/Tools.h"
53 #include "TMVA/Ranking.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/IMethod.h"
56 #include "TMVA/MethodBase.h"
57 #include "TMVA/DataInputHandler.h"
58 #include "TMVA/DataSetManager.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/DataLoader.h"
61 #include "TMVA/MethodBoost.h"
62 #include "TMVA/MethodCategory.h"
63 #include "TMVA/ROCCalc.h"
64 #include "TMVA/ROCCurve.h"
65 #include "TMVA/MsgLogger.h"
66 
67 #include "TMVA/VariableInfo.h"
68 #include "TMVA/VariableTransform.h"
69 
70 #include "TMVA/Results.h"
72 #include "TMVA/ResultsRegression.h"
73 #include "TMVA/ResultsMulticlass.h"
74 #include <list>
75 #include <bitset>
76 
77 #include "TMVA/Types.h"
78 
79 #include "TROOT.h"
80 #include "TFile.h"
81 #include "TTree.h"
82 #include "TLeaf.h"
83 #include "TEventList.h"
84 #include "TH2.h"
85 #include "TText.h"
86 #include "TLegend.h"
87 #include "TGraph.h"
88 #include "TStyle.h"
89 #include "TMatrixF.h"
90 #include "TMatrixDSym.h"
91 #include "TMultiGraph.h"
92 #include "TPrincipal.h"
93 #include "TMath.h"
94 #include "TSystem.h"
95 #include "TCanvas.h"
96 
98 //const Int_t MinNoTestEvents = 1;
99 
101 
102 #define READXML kTRUE
103 
104 //number of bits for bitset
105 #define VIBITS 32
106 
107 
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// Standard constructor.
111 ///
112 /// - jobname : this name will appear in all weight file names produced by the MVAs
113 /// - theTargetFile : output ROOT file; the test tree and all evaluation plots
114 /// will be stored here
115 /// - theOption : option string; currently: "V" for verbose
116 
117 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
118 : Configurable ( theOption ),
119  fTransformations ( "I" ),
120  fVerbose ( kFALSE ),
121  fVerboseLevel ( kINFO ),
122  fCorrelations ( kFALSE ),
123  fROC ( kTRUE ),
124  fSilentFile ( theTargetFile == nullptr ),
125  fJobName ( jobName ),
126  fAnalysisType ( Types::kClassification ),
127  fModelPersistence (kTRUE)
128 {
129  fName = "Factory";
130  fgTargetFile = theTargetFile;
132 
133  // render silent
134  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
135 
136 
137  // init configurable
138  SetConfigDescription( "Configuration options for Factory running" );
139  SetConfigName( GetName() );
140 
141  // histograms are not automatically associated with the current
142  // directory and hence don't go out of scope when closing the file
143  // TH1::AddDirectory(kFALSE);
144  Bool_t silent = kFALSE;
145 #ifdef WIN32
146  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
147  Bool_t color = kFALSE;
148  Bool_t drawProgressBar = kFALSE;
149 #else
150  Bool_t color = !gROOT->IsBatch();
151  Bool_t drawProgressBar = kTRUE;
152 #endif
153  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
154  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
155  AddPreDefVal(TString("Debug"));
156  AddPreDefVal(TString("Verbose"));
157  AddPreDefVal(TString("Info"));
158  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
159  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
160  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
161  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
162  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
163  DeclareOptionRef( drawProgressBar,
164  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
166  "ModelPersistence",
167  "Option to save the trained model in xml file or using serialization");
168 
169  TString analysisType("Auto");
170  DeclareOptionRef( analysisType,
171  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
172  AddPreDefVal(TString("Classification"));
173  AddPreDefVal(TString("Regression"));
174  AddPreDefVal(TString("Multiclass"));
175  AddPreDefVal(TString("Auto"));
176 
177  ParseOptions();
179 
180  if (Verbose()) fLogger->SetMinType( kVERBOSE );
181  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
182  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
183  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
184 
185  // global settings
186  gConfig().SetUseColor( color );
187  gConfig().SetSilent( silent );
188  gConfig().SetDrawProgressBar( drawProgressBar );
189 
190  analysisType.ToLower();
191  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
192  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
193  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
194  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
195 
196 // Greetings();
197 }
198 
199 ////////////////////////////////////////////////////////////////////////////////
200 /// Constructor.
201 
203 : Configurable ( theOption ),
204  fTransformations ( "I" ),
205  fVerbose ( kFALSE ),
206  fCorrelations ( kFALSE ),
207  fROC ( kTRUE ),
208  fSilentFile ( kTRUE ),
209  fJobName ( jobName ),
210  fAnalysisType ( Types::kClassification ),
211  fModelPersistence (kTRUE)
212 {
213  fName = "Factory";
214  fgTargetFile = nullptr;
216 
217 
218  // render silent
219  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
220 
221 
222  // init configurable
223  SetConfigDescription( "Configuration options for Factory running" );
224  SetConfigName( GetName() );
225 
226  // histograms are not automatically associated with the current
227  // directory and hence don't go out of scope when closing the file
229  Bool_t silent = kFALSE;
230 #ifdef WIN32
231  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
232  Bool_t color = kFALSE;
233  Bool_t drawProgressBar = kFALSE;
234 #else
235  Bool_t color = !gROOT->IsBatch();
236  Bool_t drawProgressBar = kTRUE;
237 #endif
238  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
239  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
240  AddPreDefVal(TString("Debug"));
241  AddPreDefVal(TString("Verbose"));
242  AddPreDefVal(TString("Info"));
243  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
244  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
245  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
246  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
247  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
248  DeclareOptionRef( drawProgressBar,
249  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
251  "ModelPersistence",
252  "Option to save the trained model in xml file or using serialization");
253 
254  TString analysisType("Auto");
255  DeclareOptionRef( analysisType,
256  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
257  AddPreDefVal(TString("Classification"));
258  AddPreDefVal(TString("Regression"));
259  AddPreDefVal(TString("Multiclass"));
260  AddPreDefVal(TString("Auto"));
261 
262  ParseOptions();
264 
265  if (Verbose()) fLogger->SetMinType( kVERBOSE );
266  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
267  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
268  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
269 
270  // global settings
271  gConfig().SetUseColor( color );
272  gConfig().SetSilent( silent );
273  gConfig().SetDrawProgressBar( drawProgressBar );
274 
275  analysisType.ToLower();
276  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
277  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
278  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
279  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
280 
281  Greetings();
282 }
283 
284 ////////////////////////////////////////////////////////////////////////////////
285 /// Print welcome message.
286 /// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
287 
289 {
291  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
292  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
293 }
294 
295 ////////////////////////////////////////////////////////////////////////////////
296 /// Destructor.
297 
299 {
300  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
301  for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
302 
303  this->DeleteAllMethods();
304 
305 
306  // problem with call of REGISTER_METHOD macro ...
307  // ClassifierFactory::DestroyInstance();
308  // Types::DestroyInstance();
309  //Tools::DestroyInstance();
310  //Config::DestroyInstance();
311 }
312 
313 ////////////////////////////////////////////////////////////////////////////////
314 /// Delete methods.
315 
317 {
318  std::map<TString,MVector*>::iterator itrMap;
319 
320  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
321  {
322  MVector *methods=itrMap->second;
323  // delete methods
324  MVector::iterator itrMethod = methods->begin();
325  for (; itrMethod != methods->end(); ++itrMethod) {
326  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
327  delete (*itrMethod);
328  }
329  methods->clear();
330  delete methods;
331  }
332 }
333 
334 ////////////////////////////////////////////////////////////////////////////////
335 
337 {
338  fVerbose = v;
339 }
340 
341 ////////////////////////////////////////////////////////////////////////////////
342 /// Book a classifier or regression method.
343 
344 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
345 {
346  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
347 
348  TString datasetname=loader->GetName();
349 
350  if( fAnalysisType == Types::kNoAnalysisType ){
351  if( loader->GetDataSetInfo().GetNClasses()==2
352  && loader->GetDataSetInfo().GetClassInfo("Signal") != NULL
353  && loader->GetDataSetInfo().GetClassInfo("Background") != NULL
354  ){
355  fAnalysisType = Types::kClassification; // default is classification
356  } else if( loader->GetDataSetInfo().GetNClasses() >= 2 ){
357  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
358  } else
359  Log() << kFATAL << "No analysis type for " << loader->GetDataSetInfo().GetNClasses() << " classes and "
360  << loader->GetDataSetInfo().GetNTargets() << " regression targets." << Endl;
361  }
362 
363  // booking via name; the names are translated into enums and the
364  // corresponding overloaded BookMethod is called
365 
366  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
367  {
368  if (GetMethod( datasetname,methodTitle ) != 0) {
369  Log() << kFATAL << "Booking failed since method with title <"
370  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
371  << Endl;
372  }
373  }
374 
375 
376  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
377  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
378  << gTools().Color("reset") << Endl << Endl;
379 
380  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
381  Int_t boostNum = 0;
382  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
383  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
384  "Number of times the classifier will be boosted" );
385  conf->ParseOptions();
386  delete conf;
387  // this is name of weight file directory
388  TString fileDir;
389  if(fModelPersistence)
390  {
391  // find prefix in fWeightFileDir;
393  fileDir = prefix;
394  if (!prefix.IsNull())
395  if (fileDir[fileDir.Length()-1] != '/') fileDir += "/";
396  fileDir += loader->GetName();
397  fileDir += "/" + gConfig().GetIONames().fWeightFileDir;
398  }
399  // initialize methods
400  IMethod* im;
401  if (!boostNum) {
402  im = ClassifierFactory::Instance().Create(theMethodName.Data(), fJobName, methodTitle,
403  loader->GetDataSetInfo(), theOption);
404  }
405  else {
406  // boosted classifier, requires a specific definition, making it transparent for the user
407  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
408  im = ClassifierFactory::Instance().Create("Boost", fJobName, methodTitle, loader->GetDataSetInfo(), theOption);
409  MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im); // DSMTEST divided into two lines
410  if (!methBoost) { // DSMTEST
411  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
412  return nullptr;
413  }
414  if (fModelPersistence) methBoost->SetWeightFileDir(fileDir);
415  methBoost->SetModelPersistence(fModelPersistence);
416  methBoost->SetBoostedMethodName(theMethodName); // DSMTEST divided into two lines
417  methBoost->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
418  methBoost->SetFile(fgTargetFile);
419  methBoost->SetSilentFile(IsSilentFile());
420  }
421 
422  MethodBase *method = dynamic_cast<MethodBase*>(im);
423  if (method==0) return 0; // could not create method
424 
425  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
426  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
427  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
428  if (!methCat) {// DSMTEST
429  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
430  return nullptr;
431  }
432  if(fModelPersistence) methCat->SetWeightFileDir(fileDir);
433  methCat->SetModelPersistence(fModelPersistence);
434  methCat->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
435  methCat->SetFile(fgTargetFile);
436  methCat->SetSilentFile(IsSilentFile());
437  } // DSMTEST
438 
439 
440  if (!method->HasAnalysisType( fAnalysisType,
441  loader->GetDataSetInfo().GetNClasses(),
442  loader->GetDataSetInfo().GetNTargets() )) {
443  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
444  if (fAnalysisType == Types::kRegression) {
445  Log() << "regression with " << loader->GetDataSetInfo().GetNTargets() << " targets." << Endl;
446  }
447  else if (fAnalysisType == Types::kMulticlass ) {
448  Log() << "multiclass classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
449  }
450  else {
451  Log() << "classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
452  }
453  return 0;
454  }
455 
456  if(fModelPersistence) method->SetWeightFileDir(fileDir);
457  method->SetModelPersistence(fModelPersistence);
458  method->SetAnalysisType( fAnalysisType );
459  method->SetupMethod();
460  method->ParseOptions();
461  method->ProcessSetup();
462  method->SetFile(fgTargetFile);
463  method->SetSilentFile(IsSilentFile());
464 
465  // check-for-unused-options is performed; may be overridden by derived classes
466  method->CheckSetup();
467 
468  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
469  {
470  MVector *mvector=new MVector;
471  fMethodsMap[datasetname]=mvector;
472  }
473  fMethodsMap[datasetname]->push_back( method );
474  return method;
475 }
476 
477 ////////////////////////////////////////////////////////////////////////////////
478 /// Books MVA method. The option configuration string is custom for each MVA
479 /// the TString field "theNameAppendix" serves to define (and distinguish)
480 /// several instances of a given MVA, eg, when one wants to compare the
481 /// performance of various configurations
482 
484 {
485  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
486 }
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 /// Adds an already constructed method to be managed by this factory.
490 ///
491 /// \note Private.
492 /// \note Know what you are doing when using this method. The method that you
493 /// are loading could be trained already.
494 ///
495 
497 {
498  TString datasetname = loader->GetName();
499  std::string methodTypeName = std::string(Types::Instance().GetMethodName(methodType).Data());
500  DataSetInfo &dsi = loader->GetDataSetInfo();
501 
502  IMethod *im = ClassifierFactory::Instance().Create(methodTypeName, dsi, weightfile );
503  MethodBase *method = (dynamic_cast<MethodBase*>(im));
504 
505  if (method == nullptr) return nullptr;
506 
507  if( method->GetMethodType() == Types::kCategory ){
508  Log() << kERROR << "Cannot handle category methods for now." << Endl;
509  }
510 
511  TString fileDir;
512  if(fModelPersistence) {
513  // find prefix in fWeightFileDir;
515  fileDir = prefix;
516  if (!prefix.IsNull())
517  if (fileDir[fileDir.Length() - 1] != '/')
518  fileDir += "/";
519  fileDir=loader->GetName();
520  fileDir+="/"+gConfig().GetIONames().fWeightFileDir;
521  }
522 
523  if(fModelPersistence) method->SetWeightFileDir(fileDir);
524  method->SetModelPersistence(fModelPersistence);
525  method->SetAnalysisType( fAnalysisType );
526  method->SetupMethod();
527  method->SetFile(fgTargetFile);
528  method->SetSilentFile(IsSilentFile());
529 
530  method->DeclareCompatibilityOptions();
531 
532  // read weight file
533  method->ReadStateFromFile();
534 
535  //method->CheckSetup();
536 
537  TString methodTitle = method->GetName();
538  if (HasMethod(datasetname, methodTitle) != 0) {
539  Log() << kFATAL << "Booking failed since method with title <"
540  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
541  << Endl;
542  }
543 
544  Log() << kINFO << "Booked classifier \"" << method->GetMethodName()
545  << "\" of type: \"" << method->GetMethodTypeName() << "\"" << Endl;
546 
547  if(fMethodsMap.count(datasetname) == 0) {
548  MVector *mvector = new MVector;
549  fMethodsMap[datasetname] = mvector;
550  }
551 
552  fMethodsMap[datasetname]->push_back( method );
553 
554  return method;
555 }
556 
557 ////////////////////////////////////////////////////////////////////////////////
558 /// Returns pointer to MVA that corresponds to given method title.
559 
560 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
561 {
562  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
563 
564  MVector *methods=fMethodsMap.find(datasetname)->second;
565 
566  MVector::const_iterator itrMethod;
567  //
568  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
569  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
570  if ( (mva->GetMethodName())==methodTitle ) return mva;
571  }
572  return 0;
573 }
574 
575 ////////////////////////////////////////////////////////////////////////////////
576 /// Checks whether a given method name is defined for a given dataset.
577 
578 Bool_t TMVA::Factory::HasMethod(const TString& datasetname, const TString &methodTitle ) const
579 {
580  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
581 
582  std::string methodName = methodTitle.Data();
583  auto isEqualToMethodName = [&methodName](TMVA::IMethod * m) {
584  return ( 0 == methodName.compare( m->GetName() ) );
585  };
586 
587  TMVA::Factory::MVector * methods = this->fMethodsMap.at(datasetname);
588  Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
589 
590  return isMethodNameExisting;
591 }
592 
593 ////////////////////////////////////////////////////////////////////////////////
594 
596 {
597  RootBaseDir()->cd();
598 
599  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
600  else return; //loader is now in the output file, we dont need to save again
601 
602  RootBaseDir()->cd(fDataSetInfo.GetName());
603  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
604 
605 
606  // correlation matrix of the default DS
607  const TMatrixD* m(0);
608  const TH2* h(0);
609 
610  if(fAnalysisType == Types::kMulticlass){
611  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
612  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
613  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
614  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
615  if (h!=0) {
616  h->Write();
617  delete h;
618  }
619  }
620  }
621  else{
622  m = fDataSetInfo.CorrelationMatrix( "Signal" );
623  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
624  if (h!=0) {
625  h->Write();
626  delete h;
627  }
628 
629  m = fDataSetInfo.CorrelationMatrix( "Background" );
630  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
631  if (h!=0) {
632  h->Write();
633  delete h;
634  }
635 
636  m = fDataSetInfo.CorrelationMatrix( "Regression" );
637  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
638  if (h!=0) {
639  h->Write();
640  delete h;
641  }
642  }
643 
644  // some default transformations to evaluate
645  // NOTE: all transformations are destroyed after this test
646  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
647 
648  // plus some user defined transformations
649  processTrfs = fTransformations;
650 
651  // remove any trace of identity transform - if given (avoid to apply it twice)
652  std::vector<TMVA::TransformationHandler*> trfs;
653  TransformationHandler* identityTrHandler = 0;
654 
655  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
656  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
657  for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
658  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
659  TString trfS = (*trfsDefIt);
660 
661  //Log() << kINFO << Endl;
662  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
664  fDataSetInfo,
665  *(trfs.back()),
666  Log() );
667 
668  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
669  }
670 
671  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
672 
673  // apply all transformations
674  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
675 
676  for (;trfIt != trfs.end(); ++trfIt) {
677  // setting a Root dir causes the variables distributions to be saved to the root file
678  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
679  (*trfIt)->CalcTransformations(inputEvents);
680  }
681  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
682 
683  // clean up
684  for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt) delete *trfIt;
685 }
686 
687 ////////////////////////////////////////////////////////////////////////////////
688 /// Iterates through all booked methods and sees if they use parameter tuning and if so..
689 /// does just that i.e. calls "Method::Train()" for different parameter settings and
690 /// keeps in mind the "optimal one"... and that's the one that will later on be used
691 /// in the main training loop.
692 
693 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
694 {
695 
696  std::map<TString,MVector*>::iterator itrMap;
697  std::map<TString,Double_t> TunedParameters;
698  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
699  {
700  MVector *methods=itrMap->second;
701 
702  MVector::iterator itrMethod;
703 
704  // iterate over methods and optimize
705  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
707  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
708  if (!mva) {
709  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
710  return TunedParameters;
711  }
712 
713  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
714  Log() << kWARNING << "Method " << mva->GetMethodName()
715  << " not trained (training tree has less entries ["
716  << mva->Data()->GetNTrainingEvents()
717  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
718  continue;
719  }
720 
721  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
722  << (fAnalysisType == Types::kRegression ? "Regression" :
723  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
724 
725  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
726  Log() << kINFO << "Optimization of tuning parameters finished for Method:"<<mva->GetName() << Endl;
727  }
728  }
729 
730  return TunedParameters;
731 
732 }
733 
734 ////////////////////////////////////////////////////////////////////////////////
735 /// Private method to generate a ROCCurve instance for a given method.
736 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
737 /// understands.
738 ///
739 /// \note You own the retured pointer.
740 ///
741 
744 {
745  return GetROC((TString)loader->GetName(), theMethodName, iClass, type);
746 }
747 
748 ////////////////////////////////////////////////////////////////////////////////
749 /// Private method to generate a ROCCurve instance for a given method.
750 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
751 /// understands.
752 ///
753 /// \note You own the retured pointer.
754 ///
755 
757 {
758  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
759  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
760  return nullptr;
761  }
762 
763  if (!this->HasMethod(datasetname, theMethodName)) {
764  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
765  << Endl;
766  return nullptr;
767  }
768 
769  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
770  if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
771  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
772  << Endl;
773  return nullptr;
774  }
775 
776  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
777  TMVA::DataSet *dataset = method->Data();
778  dataset->SetCurrentType(type);
779  TMVA::Results *results = dataset->GetResults(theMethodName, type, this->fAnalysisType);
780 
781  UInt_t nClasses = method->DataInfo().GetNClasses();
782  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
783  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
784  iClass, nClasses)
785  << Endl;
786  return nullptr;
787  }
788 
789  TMVA::ROCCurve *rocCurve = nullptr;
790  if (this->fAnalysisType == Types::kClassification) {
791 
792  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
793  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
794  std::vector<Float_t> mvaResWeights;
795 
796  auto eventCollection = dataset->GetEventCollection(type);
797  mvaResWeights.reserve(eventCollection.size());
798  for (auto ev : eventCollection) {
799  mvaResWeights.push_back(ev->GetWeight());
800  }
801 
802  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
803 
804  } else if (this->fAnalysisType == Types::kMulticlass) {
805  std::vector<Float_t> mvaRes;
806  std::vector<Bool_t> mvaResTypes;
807  std::vector<Float_t> mvaResWeights;
808 
809  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
810 
811  // Vector transpose due to values being stored as
812  // [ [0, 1, 2], [0, 1, 2], ... ]
813  // in ResultsMulticlass::GetValueVector.
814  mvaRes.reserve(rawMvaRes->size());
815  for (auto item : *rawMvaRes) {
816  mvaRes.push_back(item[iClass]);
817  }
818 
819  auto eventCollection = dataset->GetEventCollection(type);
820  mvaResTypes.reserve(eventCollection.size());
821  mvaResWeights.reserve(eventCollection.size());
822  for (auto ev : eventCollection) {
823  mvaResTypes.push_back(ev->GetClass() == iClass);
824  mvaResWeights.push_back(ev->GetWeight());
825  }
826 
827  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
828  }
829 
830  return rocCurve;
831 }
832 
833 ////////////////////////////////////////////////////////////////////////////////
834 /// Calculate the integral of the ROC curve, also known as the area under curve
835 /// (AUC), for a given method.
836 ///
837 /// Argument iClass specifies the class to generate the ROC curve in a
838 /// multiclass setting. It is ignored for binary classification.
839 ///
840 
842 {
843  return GetROCIntegral((TString)loader->GetName(), theMethodName, iClass);
844 }
845 
846 ////////////////////////////////////////////////////////////////////////////////
847 /// Calculate the integral of the ROC curve, also known as the area under curve
848 /// (AUC), for a given method.
849 ///
850 /// Argument iClass specifies the class to generate the ROC curve in a
851 /// multiclass setting. It is ignored for binary classification.
852 ///
853 
854 Double_t TMVA::Factory::GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass)
855 {
856  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
857  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
858  return 0;
859  }
860 
861  if ( ! this->HasMethod(datasetname, theMethodName) ) {
862  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
863  return 0;
864  }
865 
866  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
867  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
868  Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
869  << Endl;
870  return 0;
871  }
872 
873  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
874  if (!rocCurve) {
875  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ",
876  theMethodName.Data(), datasetname.Data())
877  << Endl;
878  return 0;
879  }
880 
882  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
883  delete rocCurve;
884 
885  return rocIntegral;
886 }
887 
888 ////////////////////////////////////////////////////////////////////////////////
889 /// Argument iClass specifies the class to generate the ROC curve in a
890 /// multiclass setting. It is ignored for binary classification.
891 ///
892 /// Returns a ROC graph for a given method, or nullptr on error.
893 ///
894 /// Note: Evaluation of the given method must have been run prior to ROC
895 /// generation through Factory::EvaluateAllMetods.
896 ///
897 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
898 /// and the others considered background. This is ok in binary classification
899 /// but in in multi class classification, the ROC surface is an N dimensional
900 /// shape, where N is number of classes - 1.
901 
902 TGraph* TMVA::Factory::GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles, UInt_t iClass)
903 {
904  return GetROCCurve( (TString)loader->GetName(), theMethodName, setTitles, iClass );
905 }
906 
907 ////////////////////////////////////////////////////////////////////////////////
908 /// Argument iClass specifies the class to generate the ROC curve in a
909 /// multiclass setting. It is ignored for binary classification.
910 ///
911 /// Returns a ROC graph for a given method, or nullptr on error.
912 ///
913 /// Note: Evaluation of the given method must have been run prior to ROC
914 /// generation through Factory::EvaluateAllMetods.
915 ///
916 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
917 /// and the others considered background. This is ok in binary classification
918 /// but in in multi class classification, the ROC surface is an N dimensional
919 /// shape, where N is number of classes - 1.
920 
921 TGraph* TMVA::Factory::GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles, UInt_t iClass)
922 {
923  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
924  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
925  return nullptr;
926  }
927 
928  if ( ! this->HasMethod(datasetname, theMethodName) ) {
929  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
930  return nullptr;
931  }
932 
933  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
934  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
935  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.") << Endl;
936  return nullptr;
937  }
938 
939  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
940  TGraph *graph = nullptr;
941 
942  if ( ! rocCurve ) {
943  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
944  return nullptr;
945  }
946 
947  graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
948  delete rocCurve;
949 
950  if(setTitles) {
951  graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
952  graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
953  graph->SetTitle(Form("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()));
954  }
955 
956  return graph;
957 }
958 
959 ////////////////////////////////////////////////////////////////////////////////
960 /// Generate a collection of graphs, for all methods for a given class. Suitable
961 /// for comparing method performance.
962 ///
963 /// Argument iClass specifies the class to generate the ROC curve in a
964 /// multiclass setting. It is ignored for binary classification.
965 ///
966 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
967 /// and the others considered background. This is ok in binary classification
968 /// but in in multi class classification, the ROC surface is an N dimensional
969 /// shape, where N is number of classes - 1.
970 
972 {
973  return GetROCCurveAsMultiGraph((TString)loader->GetName(), iClass);
974 }
975 
976 ////////////////////////////////////////////////////////////////////////////////
977 /// Generate a collection of graphs, for all methods for a given class. Suitable
978 /// for comparing method performance.
979 ///
980 /// Argument iClass specifies the class to generate the ROC curve in a
981 /// multiclass setting. It is ignored for binary classification.
982 ///
983 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
984 /// and the others considered background. This is ok in binary classification
985 /// but in in multi class classification, the ROC surface is an N dimensional
986 /// shape, where N is number of classes - 1.
987 
989 {
990  UInt_t line_color = 1;
991 
992  TMultiGraph *multigraph = new TMultiGraph();
993 
994  MVector *methods = fMethodsMap[datasetname.Data()];
995  for (auto * method_raw : *methods) {
996  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(method_raw);
997  if (method == nullptr) { continue; }
998 
999  TString methodName = method->GetMethodName();
1000  UInt_t nClasses = method->DataInfo().GetNClasses();
1001 
1002  if ( this->fAnalysisType == Types::kMulticlass && iClass >= nClasses ) {
1003  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) << Endl;
1004  continue;
1005  }
1006 
1007  TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
1008 
1009  TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass);
1010  graph->SetTitle(methodName);
1011 
1012  graph->SetLineWidth(2);
1013  graph->SetLineColor(line_color++);
1014  graph->SetFillColor(10);
1015 
1016  multigraph->Add(graph);
1017  }
1018 
1019  if ( multigraph->GetListOfGraphs() == nullptr ) {
1020  Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
1021  return nullptr;
1022  }
1023 
1024  return multigraph;
1025 }
1026 
1027 ////////////////////////////////////////////////////////////////////////////////
1028 /// Draws ROC curves for all methods booked with the factory for a given class
1029 /// onto a canvas.
1030 ///
1031 /// Argument iClass specifies the class to generate the ROC curve in a
1032 /// multiclass setting. It is ignored for binary classification.
1033 ///
1034 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1035 /// and the others considered background. This is ok in binary classification
1036 /// but in in multi class classification, the ROC surface is an N dimensional
1037 /// shape, where N is number of classes - 1.
1038 
1040 {
1041  return GetROCCurve((TString)loader->GetName(), iClass);
1042 }
1043 
1044 ////////////////////////////////////////////////////////////////////////////////
1045 /// Draws ROC curves for all methods booked with the factory for a given class.
1046 ///
1047 /// Argument iClass specifies the class to generate the ROC curve in a
1048 /// multiclass setting. It is ignored for binary classification.
1049 ///
1050 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1051 /// and the others considered background. This is ok in binary classification
1052 /// but in in multi class classification, the ROC surface is an N dimensional
1053 /// shape, where N is number of classes - 1.
1054 
1056 {
1057  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1058  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
1059  return 0;
1060  }
1061 
1062  TString name = Form("ROCCurve %s class %i", datasetname.Data(), iClass);
1063  TCanvas *canvas = new TCanvas(name, "ROC Curve", 200, 10, 700, 500);
1064  canvas->SetGrid();
1065 
1066  TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1067 
1068  if ( multigraph ) {
1069  multigraph->Draw("AL");
1070 
1071  multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
1072  multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
1073 
1074  TString titleString = Form("Signal efficiency vs. Background rejection");
1075  if (this->fAnalysisType == Types::kMulticlass) {
1076  titleString = Form("%s (Class=%i)", titleString.Data(), iClass);
1077  }
1078 
1079  // Workaround for TMultigraph not drawing title correctly.
1080  multigraph->GetHistogram()->SetTitle( titleString );
1081  multigraph->SetTitle( titleString );
1082 
1083  canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1084  }
1085 
1086  return canvas;
1087 }
1088 
1089 ////////////////////////////////////////////////////////////////////////////////
1090 /// Iterates through all booked methods and calls training
1091 
1093 {
1094  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1095  // iterates over all MVAs that have been booked, and calls their training methods
1096 
1097 
1098  // don't do anything if no method booked
1099  if (fMethodsMap.empty()) {
1100  Log() << kINFO << "...nothing found to train" << Endl;
1101  return;
1102  }
1103 
1104  // here the training starts
1105  //Log() << kINFO << " " << Endl;
1106  Log() << kDEBUG << "Train all methods for "
1107  << (fAnalysisType == Types::kRegression ? "Regression" :
1108  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
1109 
1110  std::map<TString,MVector*>::iterator itrMap;
1111 
1112  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1113  {
1114  MVector *methods=itrMap->second;
1115  MVector::iterator itrMethod;
1116 
1117  // iterate over methods and train
1118  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1120  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1121 
1122  if(mva==0) continue;
1123 
1124  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1125  Log() << kFATAL << "No input data for the training provided!" << Endl;
1126  }
1127 
1128  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
1129  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1130  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
1131  && mva->DataInfo().GetNClasses() < 2 )
1132  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1133 
1134  // first print some information about the default dataset
1135  if(!IsSilentFile()) WriteDataInformation(mva->fDataSetInfo);
1136 
1137 
1138  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1139  Log() << kWARNING << "Method " << mva->GetMethodName()
1140  << " not trained (training tree has less entries ["
1141  << mva->Data()->GetNTrainingEvents()
1142  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1143  continue;
1144  }
1145 
1146  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1147  << (fAnalysisType == Types::kRegression ? "Regression" :
1148  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
1149  mva->TrainMethod();
1150  Log() << kHEADER << "Training finished" << Endl << Endl;
1151  }
1152 
1153  if (fAnalysisType != Types::kRegression) {
1154 
1155  // variable ranking
1156  //Log() << Endl;
1157  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1158  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1159  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1160  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1161 
1162  // create and print ranking
1163  const Ranking* ranking = (*itrMethod)->CreateRanking();
1164  if (ranking != 0) ranking->Print();
1165  else Log() << kINFO << "No variable ranking supplied by classifier: "
1166  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
1167  }
1168  }
1169  }
1170 
1171  // save training history in case we are not in the silent mode
1172  if (!IsSilentFile()) {
1173  for (UInt_t i=0; i<methods->size(); i++) {
1174  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
1175  if(m==0) continue;
1176  m->BaseDir()->cd();
1177  m->fTrainHistory.SaveHistory(m->GetMethodName());
1178  }
1179  }
1180 
1181  // delete all methods and recreate them from weight file - this ensures that the application
1182  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1183  // in the testing
1184  //Log() << Endl;
1185  if (fModelPersistence) {
1186 
1187  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1188 
1189  if(!IsSilentFile())RootBaseDir()->cd();
1190 
1191  // iterate through all booked methods
1192  for (UInt_t i=0; i<methods->size(); i++) {
1193 
1194  MethodBase *m = dynamic_cast<MethodBase *>((*methods)[i]);
1195  if (m == nullptr)
1196  continue;
1197 
1198  TMVA::Types::EMVA methodType = m->GetMethodType();
1199  TString weightfile = m->GetWeightFileName();
1200 
1201  // decide if .txt or .xml file should be read:
1202  if (READXML)
1203  weightfile.ReplaceAll(".txt", ".xml");
1204 
1205  DataSetInfo &dataSetInfo = m->DataInfo();
1206  TString testvarName = m->GetTestvarName();
1207  delete m; // itrMethod[i];
1208 
1209  // recreate
1210  m = dynamic_cast<MethodBase *>(ClassifierFactory::Instance().Create(
1211  Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1212  if (m->GetMethodType() == Types::kCategory) {
1213  MethodCategory *methCat = (dynamic_cast<MethodCategory *>(m));
1214  if (!methCat)
1215  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1216  else
1217  methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1218  }
1219  // ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1220 
1221  TString wfileDir = m->DataInfo().GetName();
1222  wfileDir += "/" + gConfig().GetIONames().fWeightFileDir;
1223  m->SetWeightFileDir(wfileDir);
1224  m->SetModelPersistence(fModelPersistence);
1225  m->SetSilentFile(IsSilentFile());
1226  m->SetAnalysisType(fAnalysisType);
1227  m->SetupMethod();
1228  m->ReadStateFromFile();
1229  m->SetTestvarName(testvarName);
1230 
1231  // replace trained method by newly created one (from weight file) in methods vector
1232  (*methods)[i] = m;
1233  }
1234  }
1235  }
1236 }
1237 
1238 ////////////////////////////////////////////////////////////////////////////////
1239 /// Evaluates all booked methods on the testing data and adds the output to the
1240 /// Results in the corresponiding DataSet.
1241 ///
1242 
1244 {
1245  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1246 
1247  // don't do anything if no method booked
1248  if (fMethodsMap.empty()) {
1249  Log() << kINFO << "...nothing found to test" << Endl;
1250  return;
1251  }
1252  std::map<TString,MVector*>::iterator itrMap;
1253 
1254  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1255  {
1256  MVector *methods=itrMap->second;
1257  MVector::iterator itrMethod;
1258 
1259  // iterate over methods and test
1260  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1262  MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1263  if (mva == 0)
1264  continue;
1265  Types::EAnalysisType analysisType = mva->GetAnalysisType();
1266  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1267  << (analysisType == Types::kRegression
1268  ? "Regression"
1269  : (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
1270  << " performance" << Endl << Endl;
1271  mva->AddOutput(Types::kTesting, analysisType);
1272  }
1273  }
1274 }
1275 
1276 ////////////////////////////////////////////////////////////////////////////////
1277 
1278 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
1279 {
1280  if (methodTitle != "") {
1281  IMethod* method = GetMethod(datasetname, methodTitle);
1282  if (method) method->MakeClass();
1283  else {
1284  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
1285  << "\" in list" << Endl;
1286  }
1287  }
1288  else {
1289 
1290  // no classifier specified, print all help messages
1291  MVector *methods=fMethodsMap.find(datasetname)->second;
1292  MVector::const_iterator itrMethod;
1293  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1294  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1295  if(method==0) continue;
1296  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1297  method->MakeClass();
1298  }
1299  }
1300 }
1301 
1302 ////////////////////////////////////////////////////////////////////////////////
1303 /// Print predefined help message of classifier.
1304 /// Iterate over methods and test.
1305 
1306 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1307 {
1308  if (methodTitle != "") {
1309  IMethod* method = GetMethod(datasetname , methodTitle );
1310  if (method) method->PrintHelpMessage();
1311  else {
1312  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1313  << "\" in list" << Endl;
1314  }
1315  }
1316  else {
1317 
1318  // no classifier specified, print all help messages
1319  MVector *methods=fMethodsMap.find(datasetname)->second;
1320  MVector::const_iterator itrMethod ;
1321  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1322  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1323  if(method==0) continue;
1324  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1325  method->PrintHelpMessage();
1326  }
1327  }
1328 }
1329 
1330 ////////////////////////////////////////////////////////////////////////////////
1331 /// Iterates over all MVA input variables and evaluates them.
1332 
1334 {
1335  Log() << kINFO << "Evaluating all variables..." << Endl;
1337 
1338  for (UInt_t i=0; i<loader->GetDataSetInfo().GetNVariables(); i++) {
1339  TString s = loader->GetDataSetInfo().GetVariableInfo(i).GetLabel();
1340  if (options.Contains("V")) s += ":V";
1341  this->BookMethod(loader, "Variable", s );
1342  }
1343 }
1344 
1345 ////////////////////////////////////////////////////////////////////////////////
1346 /// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1347 
1349 {
1350  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1351 
1352  // don't do anything if no method booked
1353  if (fMethodsMap.empty()) {
1354  Log() << kINFO << "...nothing found to evaluate" << Endl;
1355  return;
1356  }
1357  std::map<TString,MVector*>::iterator itrMap;
1358 
1359  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1360  {
1361  MVector *methods=itrMap->second;
1362 
1363  // -----------------------------------------------------------------------
1364  // First part of evaluation process
1365  // --> compute efficiencies, and other separation estimators
1366  // -----------------------------------------------------------------------
1367 
1368  // although equal, we now want to separate the output for the variables
1369  // and the real methods
1370  Int_t isel; // will be 0 for a Method; 1 for a Variable
1371  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1372 
1373  std::vector<std::vector<TString> > mname(2);
1374  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1375  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1376  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1377  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1378 
1379  std::vector<std::vector<Float_t> > multiclass_testEff;
1380  std::vector<std::vector<Float_t> > multiclass_trainEff;
1381  std::vector<std::vector<Float_t> > multiclass_testPur;
1382  std::vector<std::vector<Float_t> > multiclass_trainPur;
1383 
1384  std::vector<std::vector<Float_t> > train_history;
1385 
1386  // Multiclass confusion matrices.
1387  std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1388  std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1389  std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1390  std::vector<TMatrixD> multiclass_testConfusionEffB01;
1391  std::vector<TMatrixD> multiclass_testConfusionEffB10;
1392  std::vector<TMatrixD> multiclass_testConfusionEffB30;
1393 
1394  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1395  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1396  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1397  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1398  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1399  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1400  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1401  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1402  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1403  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1404 
1405  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1406  std::vector<std::vector<Double_t> > biastrainT(1);
1407  std::vector<std::vector<Double_t> > biastestT(1);
1408  std::vector<std::vector<Double_t> > devtrainT(1);
1409  std::vector<std::vector<Double_t> > devtestT(1);
1410  std::vector<std::vector<Double_t> > rmstrainT(1);
1411  std::vector<std::vector<Double_t> > rmstestT(1);
1412  std::vector<std::vector<Double_t> > minftrainT(1);
1413  std::vector<std::vector<Double_t> > minftestT(1);
1414 
1415  // following vector contains all methods - with the exception of Cuts, which are special
1416  MVector methodsNoCuts;
1417 
1418  Bool_t doRegression = kFALSE;
1419  Bool_t doMulticlass = kFALSE;
1420 
1421  // iterate over methods and evaluate
1422  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1424  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1425  if(theMethod==0) continue;
1426  theMethod->SetFile(fgTargetFile);
1427  theMethod->SetSilentFile(IsSilentFile());
1428  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1429 
1430  if (theMethod->DoRegression()) {
1431  doRegression = kTRUE;
1432 
1433  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1434  Double_t bias, dev, rms, mInf;
1435  Double_t biasT, devT, rmsT, mInfT;
1436  Double_t rho;
1437 
1438  Log() << kINFO << "TestRegression (testing)" << Endl;
1439  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1440  biastest[0] .push_back( bias );
1441  devtest[0] .push_back( dev );
1442  rmstest[0] .push_back( rms );
1443  minftest[0] .push_back( mInf );
1444  rhotest[0] .push_back( rho );
1445  biastestT[0] .push_back( biasT );
1446  devtestT[0] .push_back( devT );
1447  rmstestT[0] .push_back( rmsT );
1448  minftestT[0] .push_back( mInfT );
1449 
1450  Log() << kINFO << "TestRegression (training)" << Endl;
1451  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1452  biastrain[0] .push_back( bias );
1453  devtrain[0] .push_back( dev );
1454  rmstrain[0] .push_back( rms );
1455  minftrain[0] .push_back( mInf );
1456  rhotrain[0] .push_back( rho );
1457  biastrainT[0].push_back( biasT );
1458  devtrainT[0] .push_back( devT );
1459  rmstrainT[0] .push_back( rmsT );
1460  minftrainT[0].push_back( mInfT );
1461 
1462  mname[0].push_back( theMethod->GetMethodName() );
1463  nmeth_used[0]++;
1464  if (!IsSilentFile()) {
1465  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1468  }
1469  } else if (theMethod->DoMulticlass()) {
1470  // ====================================================================
1471  // === Multiclass evaluation
1472  // ====================================================================
1473  doMulticlass = kTRUE;
1474  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1475 
1476  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1477  // This is why it is disabled for now.
1478  // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1479  // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1480  // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1481 
1482  theMethod->TestMulticlass();
1483 
1484  // Confusion matrix at three background efficiency levels
1485  multiclass_trainConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTraining));
1486  multiclass_trainConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTraining));
1487  multiclass_trainConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTraining));
1488 
1489  multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1490  multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1491  multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1492 
1493  if (!IsSilentFile()) {
1494  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1497  }
1498 
1499  nmeth_used[0]++;
1500  mname[0].push_back(theMethod->GetMethodName());
1501  } else {
1502 
1503  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1504  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1505 
1506  // perform the evaluation
1507  theMethod->TestClassification();
1508 
1509  // evaluate the classifier
1510  mname[isel].push_back(theMethod->GetMethodName());
1511  sig[isel].push_back(theMethod->GetSignificance());
1512  sep[isel].push_back(theMethod->GetSeparation());
1513  roc[isel].push_back(theMethod->GetROCIntegral());
1514 
1515  Double_t err;
1516  eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1517  eff01err[isel].push_back(err);
1518  eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1519  eff10err[isel].push_back(err);
1520  eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1521  eff30err[isel].push_back(err);
1522  effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1523 
1524  trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1525  trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1526  trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1527 
1528  nmeth_used[isel]++;
1529 
1530  if (!IsSilentFile()) {
1531  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1534  }
1535  }
1536  }
1537  if (doRegression) {
1538 
1539  std::vector<TString> vtemps = mname[0];
1540  std::vector< std::vector<Double_t> > vtmp;
1541  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1542  vtmp.push_back( devtrain[0] );
1543  vtmp.push_back( biastest[0] );
1544  vtmp.push_back( biastrain[0] );
1545  vtmp.push_back( rmstest[0] );
1546  vtmp.push_back( rmstrain[0] );
1547  vtmp.push_back( minftest[0] );
1548  vtmp.push_back( minftrain[0] );
1549  vtmp.push_back( rhotest[0] );
1550  vtmp.push_back( rhotrain[0] );
1551  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1552  vtmp.push_back( devtrainT[0] );
1553  vtmp.push_back( biastestT[0] );
1554  vtmp.push_back( biastrainT[0]);
1555  vtmp.push_back( rmstestT[0] );
1556  vtmp.push_back( rmstrainT[0] );
1557  vtmp.push_back( minftestT[0] );
1558  vtmp.push_back( minftrainT[0]);
1559  gTools().UsefulSortAscending( vtmp, &vtemps );
1560  mname[0] = vtemps;
1561  devtest[0] = vtmp[0];
1562  devtrain[0] = vtmp[1];
1563  biastest[0] = vtmp[2];
1564  biastrain[0] = vtmp[3];
1565  rmstest[0] = vtmp[4];
1566  rmstrain[0] = vtmp[5];
1567  minftest[0] = vtmp[6];
1568  minftrain[0] = vtmp[7];
1569  rhotest[0] = vtmp[8];
1570  rhotrain[0] = vtmp[9];
1571  devtestT[0] = vtmp[10];
1572  devtrainT[0] = vtmp[11];
1573  biastestT[0] = vtmp[12];
1574  biastrainT[0] = vtmp[13];
1575  rmstestT[0] = vtmp[14];
1576  rmstrainT[0] = vtmp[15];
1577  minftestT[0] = vtmp[16];
1578  minftrainT[0] = vtmp[17];
1579  } else if (doMulticlass) {
1580  // TODO: fill in something meaningful
1581  // If there is some ranking of methods to be done it should be done here.
1582  // However, this is not so easy to define for multiclass so it is left out for now.
1583 
1584  }
1585  else {
1586  // now sort the variables according to the best 'eff at Beff=0.10'
1587  for (Int_t k=0; k<2; k++) {
1588  std::vector< std::vector<Double_t> > vtemp;
1589  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1590  vtemp.push_back( eff10[k] );
1591  vtemp.push_back( eff01[k] );
1592  vtemp.push_back( eff30[k] );
1593  vtemp.push_back( eff10err[k] );
1594  vtemp.push_back( eff01err[k] );
1595  vtemp.push_back( eff30err[k] );
1596  vtemp.push_back( trainEff10[k] );
1597  vtemp.push_back( trainEff01[k] );
1598  vtemp.push_back( trainEff30[k] );
1599  vtemp.push_back( sig[k] );
1600  vtemp.push_back( sep[k] );
1601  vtemp.push_back( roc[k] );
1602  std::vector<TString> vtemps = mname[k];
1603  gTools().UsefulSortDescending( vtemp, &vtemps );
1604  effArea[k] = vtemp[0];
1605  eff10[k] = vtemp[1];
1606  eff01[k] = vtemp[2];
1607  eff30[k] = vtemp[3];
1608  eff10err[k] = vtemp[4];
1609  eff01err[k] = vtemp[5];
1610  eff30err[k] = vtemp[6];
1611  trainEff10[k] = vtemp[7];
1612  trainEff01[k] = vtemp[8];
1613  trainEff30[k] = vtemp[9];
1614  sig[k] = vtemp[10];
1615  sep[k] = vtemp[11];
1616  roc[k] = vtemp[12];
1617  mname[k] = vtemps;
1618  }
1619  }
1620 
1621  // -----------------------------------------------------------------------
1622  // Second part of evaluation process
1623  // --> compute correlations among MVAs
1624  // --> compute correlations between input variables and MVA (determines importance)
1625  // --> count overlaps
1626  // -----------------------------------------------------------------------
1627  if(fCorrelations)
1628  {
1629  const Int_t nmeth = methodsNoCuts.size();
1630  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1631  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1632  if (!doRegression && !doMulticlass ) {
1633 
1634  if (nmeth > 0) {
1635 
1636  // needed for correlations
1637  Double_t *dvec = new Double_t[nmeth+nvar];
1638  std::vector<Double_t> rvec;
1639 
1640  // for correlations
1641  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1642  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1643 
1644  // set required tree branch references
1645  Int_t ivar = 0;
1646  std::vector<TString>* theVars = new std::vector<TString>;
1647  std::vector<ResultsClassification*> mvaRes;
1648  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1649  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1650  if(m==0) continue;
1651  theVars->push_back( m->GetTestvarName() );
1652  rvec.push_back( m->GetSignalReferenceCut() );
1653  theVars->back().ReplaceAll( "MVA_", "" );
1654  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1657  }
1658 
1659  // for overlap study
1660  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1661  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1662  (*overlapS) *= 0; // init...
1663  (*overlapB) *= 0; // init...
1664 
1665  // loop over test tree
1666  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1668  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1669  const Event* ev = defDs->GetEvent(ievt);
1670 
1671  // for correlations
1672  TMatrixD* theMat = 0;
1673  for (Int_t im=0; im<nmeth; im++) {
1674  // check for NaN value
1675  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1676  if (TMath::IsNaN(retval)) {
1677  Log() << kWARNING << "Found NaN return value in event: " << ievt
1678  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1679  dvec[im] = 0;
1680  }
1681  else dvec[im] = retval;
1682  }
1683  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1684  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1685  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1686 
1687  // count overlaps
1688  for (Int_t im=0; im<nmeth; im++) {
1689  for (Int_t jm=im; jm<nmeth; jm++) {
1690  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1691  (*theMat)(im,jm)++;
1692  if (im != jm) (*theMat)(jm,im)++;
1693  }
1694  }
1695  }
1696  }
1697 
1698  // renormalise overlap matrix
1699  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1700  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1701 
1702  tpSig->MakePrincipals();
1703  tpBkg->MakePrincipals();
1704 
1705  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1706  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1707 
1708  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1709  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1710 
1711  // print correlation matrices
1712  if (corrMatS != 0 && corrMatB != 0) {
1713 
1714  // extract MVA matrix
1715  TMatrixD mvaMatS(nmeth,nmeth);
1716  TMatrixD mvaMatB(nmeth,nmeth);
1717  for (Int_t im=0; im<nmeth; im++) {
1718  for (Int_t jm=0; jm<nmeth; jm++) {
1719  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1720  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1721  }
1722  }
1723 
1724  // extract variables - to MVA matrix
1725  std::vector<TString> theInputVars;
1726  TMatrixD varmvaMatS(nvar,nmeth);
1727  TMatrixD varmvaMatB(nvar,nmeth);
1728  for (Int_t iv=0; iv<nvar; iv++) {
1729  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1730  for (Int_t jm=0; jm<nmeth; jm++) {
1731  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1732  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1733  }
1734  }
1735 
1736  if (nmeth > 1) {
1737  Log() << kINFO << Endl;
1738  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1739  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1740  Log() << kINFO << Endl;
1741 
1742  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1743  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1744  Log() << kINFO << Endl;
1745  }
1746 
1747  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1748  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1749  Log() << kINFO << Endl;
1750 
1751  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1752  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1753  Log() << kINFO << Endl;
1754  }
1755  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1756 
1757  // print overlap matrices
1758  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1759  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1760  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1761  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1762  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1763 
1764  // give notice that cut method has been excluded from this test
1765  if (nmeth != (Int_t)methods->size())
1766  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1767 
1768  if (nmeth > 1) {
1769  Log() << kINFO << Endl;
1770  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1771  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1772  Log() << kINFO << Endl;
1773 
1774  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1775  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1776  }
1777 
1778  // cleanup
1779  delete tpSig;
1780  delete tpBkg;
1781  delete corrMatS;
1782  delete corrMatB;
1783  delete theVars;
1784  delete overlapS;
1785  delete overlapB;
1786  delete [] dvec;
1787  }
1788  }
1789  }
1790  // -----------------------------------------------------------------------
1791  // Third part of evaluation process
1792  // --> output
1793  // -----------------------------------------------------------------------
1794 
1795  if (doRegression) {
1796 
1797  Log() << kINFO << Endl;
1798  TString hLine = "--------------------------------------------------------------------------------------------------";
1799  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1800  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1801  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1802  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1803  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1804  Log() << kINFO << hLine << Endl;
1805  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1806  Log() << kINFO << hLine << Endl;
1807 
1808  for (Int_t i=0; i<nmeth_used[0]; i++) {
1809  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1810  if(theMethod==0) continue;
1811 
1812  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1813  theMethod->fDataSetInfo.GetName(),
1814  (const char*)mname[0][i],
1815  biastest[0][i], biastestT[0][i],
1816  rmstest[0][i], rmstestT[0][i],
1817  minftest[0][i], minftestT[0][i] )
1818  << Endl;
1819  }
1820  Log() << kINFO << hLine << Endl;
1821  Log() << kINFO << Endl;
1822  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1823  Log() << kINFO << "(overtraining check)" << Endl;
1824  Log() << kINFO << hLine << Endl;
1825  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1826  Log() << kINFO << hLine << Endl;
1827 
1828  for (Int_t i=0; i<nmeth_used[0]; i++) {
1829  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1830  if(theMethod==0) continue;
1831  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1832  theMethod->fDataSetInfo.GetName(),
1833  (const char*)mname[0][i],
1834  biastrain[0][i], biastrainT[0][i],
1835  rmstrain[0][i], rmstrainT[0][i],
1836  minftrain[0][i], minftrainT[0][i] )
1837  << Endl;
1838  }
1839  Log() << kINFO << hLine << Endl;
1840  Log() << kINFO << Endl;
1841  } else if (doMulticlass) {
1842  // ====================================================================
1843  // === Multiclass Output
1844  // ====================================================================
1845 
1846  TString hLine =
1847  "-------------------------------------------------------------------------------------------------------";
1848 
1849  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1850  // This is why it is disabled for now.
1851  //
1852  // // --- Acheivable signal efficiency * signal purity
1853  // // --------------------------------------------------------------------
1854  // Log() << kINFO << Endl;
1855  // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1856  // Log() << kINFO << hLine << Endl;
1857 
1858  // // iterate over methods and evaluate
1859  // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1860  // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1861  // if (theMethod == 0) {
1862  // continue;
1863  // }
1864 
1865  // TString header = "DataSet Name MVA Method ";
1866  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1867  // header += Form("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1868  // }
1869 
1870  // Log() << kINFO << header << Endl;
1871  // Log() << kINFO << hLine << Endl;
1872  // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1873  // TString res = Form("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), (const char *)mname[0][i]);
1874  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1875  // res += Form("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1876  // }
1877  // Log() << kINFO << res << Endl;
1878  // }
1879 
1880  // Log() << kINFO << hLine << Endl;
1881  // Log() << kINFO << Endl;
1882  // }
1883 
1884  // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1885  // --------------------------------------------------------------------
1886  TString header1 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Dataset", "MVA Method", "ROC AUC", "Sig eff@B=0.01",
1887  "Sig eff@B=0.10", "Sig eff@B=0.30");
1888  TString header2 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Name:", "/ Class:", "test (train)", "test (train)",
1889  "test (train)", "test (train)");
1890  Log() << kINFO << Endl;
1891  Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1892  Log() << kINFO << hLine << Endl;
1893  Log() << kINFO << Endl;
1894  Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1895  Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1896  Log() << kINFO << "A score of 0.820 (0.850) means 0.820 was acheived on the" << Endl;
1897  Log() << kINFO << "test set and 0.850 on the training set." << Endl;
1898 
1899  Log() << kINFO << Endl;
1900  Log() << kINFO << header1 << Endl;
1901  Log() << kINFO << header2 << Endl;
1902  for (Int_t k = 0; k < 2; k++) {
1903  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1904  if (k == 1) {
1905  mname[k][i].ReplaceAll("Variable_", "");
1906  }
1907 
1908  const TString datasetName = itrMap->first;
1909  const TString mvaName = mname[k][i];
1910 
1911  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1912  if (theMethod == 0) {
1913  continue;
1914  }
1915 
1916  Log() << kINFO << Endl;
1917  TString row = Form("%-15s%-15s", datasetName.Data(), mvaName.Data());
1918  Log() << kINFO << row << Endl;
1919  Log() << kINFO << "------------------------------" << Endl;
1920 
1921  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1922  for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1923 
1924  ROCCurve *rocCurveTrain = GetROC(datasetName, mvaName, iClass, Types::kTraining);
1925  ROCCurve *rocCurveTest = GetROC(datasetName, mvaName, iClass, Types::kTesting);
1926 
1927  const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1928  const Double_t rocaucTrain = rocCurveTrain->GetROCIntegral();
1929  const Double_t effB01Train = rocCurveTrain->GetEffSForEffB(0.01);
1930  const Double_t effB10Train = rocCurveTrain->GetEffSForEffB(0.10);
1931  const Double_t effB30Train = rocCurveTrain->GetEffSForEffB(0.30);
1932  const Double_t rocaucTest = rocCurveTest->GetROCIntegral();
1933  const Double_t effB01Test = rocCurveTest->GetEffSForEffB(0.01);
1934  const Double_t effB10Test = rocCurveTest->GetEffSForEffB(0.10);
1935  const Double_t effB30Test = rocCurveTest->GetEffSForEffB(0.30);
1936  const TString rocaucCmp = Form("%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1937  const TString effB01Cmp = Form("%5.3f (%5.3f)", effB01Test, effB01Train);
1938  const TString effB10Cmp = Form("%5.3f (%5.3f)", effB10Test, effB10Train);
1939  const TString effB30Cmp = Form("%5.3f (%5.3f)", effB30Test, effB30Train);
1940  row = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "", className.Data(), rocaucCmp.Data(), effB01Cmp.Data(),
1941  effB10Cmp.Data(), effB30Cmp.Data());
1942  Log() << kINFO << row << Endl;
1943 
1944  delete rocCurveTrain;
1945  delete rocCurveTest;
1946  }
1947  }
1948  }
1949  Log() << kINFO << Endl;
1950  Log() << kINFO << hLine << Endl;
1951  Log() << kINFO << Endl;
1952 
1953  // --- Confusion matrices
1954  // --------------------------------------------------------------------
1955  auto printMatrix = [](TMatrixD const &matTraining, TMatrixD const &matTesting, std::vector<TString> classnames,
1956  UInt_t numClasses, MsgLogger &stream) {
1957  // assert (classLabledWidth >= valueLabelWidth + 2)
1958  // if (...) {Log() << kWARN << "..." << Endl; }
1959 
1960  // TODO: Ensure matrices are same size.
1961 
1962  TString header = Form(" %-14s", " ");
1963  TString headerInfo = Form(" %-14s", " ");
1964  ;
1965  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1966  header += Form(" %-14s", classnames[iCol].Data());
1967  headerInfo += Form(" %-14s", " test (train)");
1968  }
1969  stream << kINFO << header << Endl;
1970  stream << kINFO << headerInfo << Endl;
1971 
1972  for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1973  stream << kINFO << Form(" %-14s", classnames[iRow].Data());
1974 
1975  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1976  if (iCol == iRow) {
1977  stream << kINFO << Form(" %-14s", "-");
1978  } else {
1979  Double_t trainValue = matTraining[iRow][iCol];
1980  Double_t testValue = matTesting[iRow][iCol];
1981  TString entry = Form("%-5.3f (%-5.3f)", testValue, trainValue);
1982  stream << kINFO << Form(" %-14s", entry.Data());
1983  }
1984  }
1985  stream << kINFO << Endl;
1986  }
1987  };
1988 
1989  Log() << kINFO << Endl;
1990  Log() << kINFO << "Confusion matrices for all methods" << Endl;
1991  Log() << kINFO << hLine << Endl;
1992  Log() << kINFO << Endl;
1993  Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
1994  Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
1995  Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
1996  Log() << kINFO << "by the column index is considered background." << Endl;
1997  Log() << kINFO << Endl;
1998  for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1999  MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
2000  if (theMethod == nullptr) {
2001  continue;
2002  }
2003  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
2004 
2005  std::vector<TString> classnames;
2006  for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2007  classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
2008  }
2009  Log() << kINFO
2010  << "=== Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
2011  << Endl;
2012  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
2013  Log() << kINFO << "---------------------------------------------------" << Endl;
2014  printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2015  numClasses, Log());
2016  Log() << kINFO << Endl;
2017 
2018  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
2019  Log() << kINFO << "---------------------------------------------------" << Endl;
2020  printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2021  numClasses, Log());
2022  Log() << kINFO << Endl;
2023 
2024  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
2025  Log() << kINFO << "---------------------------------------------------" << Endl;
2026  printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2027  numClasses, Log());
2028  Log() << kINFO << Endl;
2029  }
2030  Log() << kINFO << hLine << Endl;
2031  Log() << kINFO << Endl;
2032 
2033  } else {
2034  // Binary classification
2035  if (fROC) {
2036  Log().EnableOutput();
2038  Log() << Endl;
2039  TString hLine = "------------------------------------------------------------------------------------------"
2040  "-------------------------";
2041  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
2042  Log() << kINFO << hLine << Endl;
2043  Log() << kINFO << "DataSet MVA " << Endl;
2044  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
2045 
2046  // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
2047  // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
2048  // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
2049  Log() << kDEBUG << hLine << Endl;
2050  for (Int_t k = 0; k < 2; k++) {
2051  if (k == 1 && nmeth_used[k] > 0) {
2052  Log() << kINFO << hLine << Endl;
2053  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2054  }
2055  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2056  TString datasetName = itrMap->first;
2057  TString methodName = mname[k][i];
2058 
2059  if (k == 1) {
2060  methodName.ReplaceAll("Variable_", "");
2061  }
2062 
2063  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
2064  if (theMethod == 0) {
2065  continue;
2066  }
2067 
2068  TMVA::DataSet *dataset = theMethod->Data();
2069  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
2070  std::vector<Bool_t> *mvaResType =
2071  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
2072 
2073  Double_t rocIntegral = 0.0;
2074  if (mvaResType->size() != 0) {
2075  rocIntegral = GetROCIntegral(datasetName, methodName);
2076  }
2077 
2078  if (sep[k][i] < 0 || sig[k][i] < 0) {
2079  // cannot compute separation/significance -> no MVA (usually for Cuts)
2080  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
2081  << Endl;
2082 
2083  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2084  // %#1.3f %#1.3f | -- --",
2085  // datasetName.Data(),
2086  // methodName.Data(),
2087  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2088  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2089  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2090  // effArea[k][i],rocIntegral) << Endl;
2091  } else {
2092  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
2093  << Endl;
2094  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2095  // %#1.3f %#1.3f | %#1.3f %#1.3f",
2096  // datasetName.Data(),
2097  // methodName.Data(),
2098  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2099  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2100  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2101  // effArea[k][i],rocIntegral,
2102  // sep[k][i], sig[k][i]) << Endl;
2103  }
2104  }
2105  }
2106  Log() << kINFO << hLine << Endl;
2107  Log() << kINFO << Endl;
2108  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
2109  Log() << kINFO << hLine << Endl;
2110  Log() << kINFO
2111  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2112  << Endl;
2113  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2114  << Endl;
2115  Log() << kINFO << hLine << Endl;
2116  for (Int_t k = 0; k < 2; k++) {
2117  if (k == 1 && nmeth_used[k] > 0) {
2118  Log() << kINFO << hLine << Endl;
2119  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2120  }
2121  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2122  if (k == 1) mname[k][i].ReplaceAll("Variable_", "");
2123  MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2124  if (theMethod == 0) continue;
2125 
2126  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2127  theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2128  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2129  << Endl;
2130  }
2131  }
2132  Log() << kINFO << hLine << Endl;
2133  Log() << kINFO << Endl;
2134 
2135  if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput();
2136  } // end fROC
2137  }
2138  if(!IsSilentFile())
2139  {
2140  std::list<TString> datasets;
2141  for (Int_t k=0; k<2; k++) {
2142  for (Int_t i=0; i<nmeth_used[k]; i++) {
2143  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
2144  if(theMethod==0) continue;
2145  // write test/training trees
2146  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2147  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
2148  {
2151  datasets.push_back(theMethod->fDataSetInfo.GetName());
2152  }
2153  }
2154  }
2155  }
2156  }//end for MethodsMap
2157  // references for citation
2159 }
2160 
2161 ////////////////////////////////////////////////////////////////////////////////
2162 /// Evaluate Variable Importance
2163 
2164 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2165 {
2166  fModelPersistence=kFALSE;
2167  fSilentFile=kTRUE;//we need silent file here because we need fast classification results
2168 
2169  //getting number of variables and variable names from loader
2170  const int nbits = loader->GetDataSetInfo().GetNVariables();
2171  if(vitype==VIType::kShort)
2172  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2173  else if(vitype==VIType::kAll)
2174  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2175  else if(vitype==VIType::kRandom&&nbits>10)
2176  {
2177  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
2178  }else
2179  {
2180  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2181  return nullptr;
2182  }
2183 }
2184 
2185 ////////////////////////////////////////////////////////////////////////////////
2186 
2187 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2188 {
2189 
2190  uint64_t x = 0;
2191  uint64_t y = 0;
2192 
2193  //getting number of variables and variable names from loader
2194  const int nbits = loader->GetDataSetInfo().GetNVariables();
2195  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2196 
2197  uint64_t range = pow(2, nbits);
2198 
2199  //vector to save importances
2200  std::vector<Double_t> importances(nbits);
2201  //vector to save ROC
2202  std::vector<Double_t> ROC(range);
2203  ROC[0]=0.5;
2204  for (int i = 0; i < nbits; i++)importances[i] = 0;
2205 
2206  Double_t SROC, SSROC; //computed ROC value
2207  for ( x = 1; x <range ; x++) {
2208 
2209  std::bitset<VIBITS> xbitset(x);
2210  if (x == 0) continue; //data loader need at least one variable
2211 
2212  //creating loader for seed
2213  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2214 
2215  //adding variables from seed
2216  for (int index = 0; index < nbits; index++) {
2217  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2218  }
2219 
2220  DataLoaderCopy(seedloader,loader);
2221  seedloader->PrepareTrainingAndTestTree(loader->GetDataSetInfo().GetCut("Signal"), loader->GetDataSetInfo().GetCut("Background"), loader->GetDataSetInfo().GetSplitOptions());
2222 
2223  //Booking Seed
2224  BookMethod(seedloader, theMethod, methodTitle, theOption);
2225 
2226  //Train/Test/Evaluation
2227  TrainAllMethods();
2228  TestAllMethods();
2229  EvaluateAllMethods();
2230 
2231  //getting ROC
2232  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2233 
2234  //cleaning information to process sub-seeds
2235  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2236  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2237  delete sresults;
2238  delete seedloader;
2239  this->DeleteAllMethods();
2240 
2241  fMethodsMap.clear();
2242  //removing global result because it is requiring a lot of RAM for all seeds
2243  }
2244 
2245 
2246  for ( x = 0; x <range ; x++)
2247  {
2248  SROC=ROC[x];
2249  for (uint32_t i = 0; i < VIBITS; ++i) {
2250  if (x & (uint64_t(1) << i)) {
2251  y = x & ~(1 << i);
2252  std::bitset<VIBITS> ybitset(y);
2253  //need at least one variable
2254  //NOTE: if sub-seed is zero then is the special case
2255  //that count in xbitset is 1
2256  Double_t ny = log(x - y) / 0.693147;
2257  if (y == 0) {
2258  importances[ny] = SROC - 0.5;
2259  continue;
2260  }
2261 
2262  //getting ROC
2263  SSROC = ROC[y];
2264  importances[ny] += SROC - SSROC;
2265  //cleaning information
2266  }
2267 
2268  }
2269  }
2270  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
2271  return GetImportance(nbits,importances,varNames);
2272 }
2273 
2274 static long int sum(long int i)
2275 {
2276  long int _sum=0;
2277  for(long int n=0;n<i;n++) _sum+=pow(2,n);
2278  return _sum;
2279 }
2280 
2281 ////////////////////////////////////////////////////////////////////////////////
2282 
2283 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2284 {
2285  uint64_t x = 0;
2286  uint64_t y = 0;
2287 
2288  //getting number of variables and variable names from loader
2289  const int nbits = loader->GetDataSetInfo().GetNVariables();
2290  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2291 
2292  long int range = sum(nbits);
2293 // std::cout<<range<<std::endl;
2294  //vector to save importances
2295  std::vector<Double_t> importances(nbits);
2296  for (int i = 0; i < nbits; i++)importances[i] = 0;
2297 
2298  Double_t SROC, SSROC; //computed ROC value
2299 
2300  x = range;
2301 
2302  std::bitset<VIBITS> xbitset(x);
2303  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //data loader need at least one variable
2304 
2305 
2306  //creating loader for seed
2307  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2308 
2309  //adding variables from seed
2310  for (int index = 0; index < nbits; index++) {
2311  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2312  }
2313 
2314  //Loading Dataset
2315  DataLoaderCopy(seedloader,loader);
2316 
2317  //Booking Seed
2318  BookMethod(seedloader, theMethod, methodTitle, theOption);
2319 
2320  //Train/Test/Evaluation
2321  TrainAllMethods();
2322  TestAllMethods();
2323  EvaluateAllMethods();
2324 
2325  //getting ROC
2326  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2327 
2328  //cleaning information to process sub-seeds
2329  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2330  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2331  delete sresults;
2332  delete seedloader;
2333  this->DeleteAllMethods();
2334  fMethodsMap.clear();
2335 
2336  //removing global result because it is requiring a lot of RAM for all seeds
2337 
2338  for (uint32_t i = 0; i < VIBITS; ++i) {
2339  if (x & (1 << i)) {
2340  y = x & ~(uint64_t(1) << i);
2341  std::bitset<VIBITS> ybitset(y);
2342  //need at least one variable
2343  //NOTE: if sub-seed is zero then is the special case
2344  //that count in xbitset is 1
2345  Double_t ny = log(x - y) / 0.693147;
2346  if (y == 0) {
2347  importances[ny] = SROC - 0.5;
2348  continue;
2349  }
2350 
2351  //creating loader for sub-seed
2352  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2353  //adding variables from sub-seed
2354  for (int index = 0; index < nbits; index++) {
2355  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2356  }
2357 
2358  //Loading Dataset
2359  DataLoaderCopy(subseedloader,loader);
2360 
2361  //Booking SubSeed
2362  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2363 
2364  //Train/Test/Evaluation
2365  TrainAllMethods();
2366  TestAllMethods();
2367  EvaluateAllMethods();
2368 
2369  //getting ROC
2370  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2371  importances[ny] += SROC - SSROC;
2372 
2373  //cleaning information
2374  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2376  delete ssresults;
2377  delete subseedloader;
2378  this->DeleteAllMethods();
2379  fMethodsMap.clear();
2380  }
2381  }
2382  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
2383  return GetImportance(nbits,importances,varNames);
2384 }
2385 
2386 ////////////////////////////////////////////////////////////////////////////////
2387 
2388 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2389 {
2390  TRandom3 *rangen = new TRandom3(0); //Random Gen.
2391 
2392  uint64_t x = 0;
2393  uint64_t y = 0;
2394 
2395  //getting number of variables and variable names from loader
2396  const int nbits = loader->GetDataSetInfo().GetNVariables();
2397  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2398 
2399  long int range = pow(2, nbits);
2400 
2401  //vector to save importances
2402  std::vector<Double_t> importances(nbits);
2403  Double_t importances_norm = 0;
2404  for (int i = 0; i < nbits; i++)importances[i] = 0;
2405 
2406  Double_t SROC, SSROC; //computed ROC value
2407  for (UInt_t n = 0; n < nseeds; n++) {
2408  x = rangen -> Integer(range);
2409 
2410  std::bitset<32> xbitset(x);
2411  if (x == 0) continue; //data loader need at least one variable
2412 
2413 
2414  //creating loader for seed
2415  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2416 
2417  //adding variables from seed
2418  for (int index = 0; index < nbits; index++) {
2419  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2420  }
2421 
2422  //Loading Dataset
2423  DataLoaderCopy(seedloader,loader);
2424 
2425  //Booking Seed
2426  BookMethod(seedloader, theMethod, methodTitle, theOption);
2427 
2428  //Train/Test/Evaluation
2429  TrainAllMethods();
2430  TestAllMethods();
2431  EvaluateAllMethods();
2432 
2433  //getting ROC
2434  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2435 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2436 
2437  //cleaning information to process sub-seeds
2438  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2439  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2440  delete sresults;
2441  delete seedloader;
2442  this->DeleteAllMethods();
2443  fMethodsMap.clear();
2444 
2445  //removing global result because it is requiring a lot of RAM for all seeds
2446 
2447  for (uint32_t i = 0; i < 32; ++i) {
2448  if (x & (uint64_t(1) << i)) {
2449  y = x & ~(1 << i);
2450  std::bitset<32> ybitset(y);
2451  //need at least one variable
2452  //NOTE: if sub-seed is zero then is the special case
2453  //that count in xbitset is 1
2454  Double_t ny = log(x - y) / 0.693147;
2455  if (y == 0) {
2456  importances[ny] = SROC - 0.5;
2457  importances_norm += importances[ny];
2458  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2459  continue;
2460  }
2461 
2462  //creating loader for sub-seed
2463  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2464  //adding variables from sub-seed
2465  for (int index = 0; index < nbits; index++) {
2466  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2467  }
2468 
2469  //Loading Dataset
2470  DataLoaderCopy(subseedloader,loader);
2471 
2472  //Booking SubSeed
2473  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2474 
2475  //Train/Test/Evaluation
2476  TrainAllMethods();
2477  TestAllMethods();
2478  EvaluateAllMethods();
2479 
2480  //getting ROC
2481  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2482  importances[ny] += SROC - SSROC;
2483  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
2484  //cleaning information
2485  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2487  delete ssresults;
2488  delete subseedloader;
2489  this->DeleteAllMethods();
2490  fMethodsMap.clear();
2491  }
2492  }
2493  }
2494  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2495  return GetImportance(nbits,importances,varNames);
2496 }
2497 
2498 ////////////////////////////////////////////////////////////////////////////////
2499 
2500 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2501 {
2502  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2503 
2504  gStyle->SetOptStat(000000);
2505 
2506  Float_t normalization = 0.0;
2507  for (int i = 0; i < nbits; i++) {
2508  normalization = normalization + importances[i];
2509  }
2510 
2511  Float_t roc = 0.0;
2512 
2513  gStyle->SetTitleXOffset(0.4);
2514  gStyle->SetTitleXOffset(1.2);
2515 
2516 
2517  std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2518  for (Int_t i = 1; i < nbits + 1; i++) {
2519  x_ie[i - 1] = (i - 1) * 1.;
2520  roc = 100.0 * importances[i - 1] / normalization;
2521  y_ie[i - 1] = roc;
2522  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2523  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2524  vih1->SetBinContent(i, roc);
2525  }
2526  TGraph *g_ie = new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
2527  g_ie->SetTitle("");
2528 
2529  vih1->LabelsOption("v >", "X");
2530  vih1->SetBarWidth(0.97);
2531  Int_t ca = TColor::GetColor("#006600");
2532  vih1->SetFillColor(ca);
2533  //Int_t ci = TColor::GetColor("#990000");
2534 
2535  vih1->GetYaxis()->SetTitle("Importance (%)");
2536  vih1->GetYaxis()->SetTitleSize(0.045);
2537  vih1->GetYaxis()->CenterTitle();
2538  vih1->GetYaxis()->SetTitleOffset(1.24);
2539 
2540  vih1->GetYaxis()->SetRangeUser(-7, 50);
2541  vih1->SetDirectory(0);
2542 
2543 // vih1->Draw("B");
2544  return vih1;
2545 }
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
Definition: TAttAxis.cxx:294
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:381
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2500
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Definition: DataSetInfo.h:125
static long int sum(long int i)
Definition: Factory.cxx:2274
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:576
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:344
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Definition: ROCCurve.cxx:220
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
auto * m
Definition: textangle.C:8
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:5204
Double_t Log(Double_t x)
Definition: TMath.h:750
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1333
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:213
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:251
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8371
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1337
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
R__EXTERN TStyle * gStyle
Definition: TStyle.h:410
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2283
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:139
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:48
Bool_t Verbose(void) const
Definition: Factory.h:135
EAnalysisType
Definition: Types.h:127
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:36
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:819
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
#define gROOT
Definition: TROOT.h:415
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:209
Basic string class.
Definition: TString.h:131
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
int Int_t
Definition: RtypesCore.h:41
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1092
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Change (i.e.
Definition: TGraph.cxx:2315
UInt_t GetNClasses() const
Definition: DataSetInfo.h:153
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t IsNaN(Double_t x)
Definition: TMath.h:882
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:971
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:595
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:227
const TString & GetLabel() const
Definition: VariableInfo.h:59
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:377
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
Bool_t CheckForSilentOption(const TString &) const
check for "silence" option in configuration option string
Definition: Tools.cxx:703
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:184
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
MsgLogger * fLogger
Definition: Configurable.h:128
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:486
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2388
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1224
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:356
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:902
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:939
TString fWeightFileDirPrefix
Definition: Config.h:123
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
#define READXML
Definition: Factory.cxx:102
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:605
TAxis * GetXaxis()
Get x axis of the graph.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2164
Bool_t fModelPersistence
the training type
Definition: Factory.h:219
DataSet * Data() const
Definition: MethodBase.h:408
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:609
TString fWeightFileDir
Definition: Config.h:124
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
TString fVerboseLevel
verbose mode
Definition: Factory.h:211
void PrintHelpMessage() const
prints out method-specific help method
IONames & GetIONames()
Definition: Config.h:100
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
Bool_t DoRegression() const
Definition: MethodBase.h:438
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:71
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2187
Class that contains all the data information.
Definition: DataSetInfo.h:60
static constexpr double s
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
overwrite existing object with same name
Definition: TObject.h:88
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
Definition: Cppyy.cxx:751
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
Bool_t DoMulticlass() const
Definition: MethodBase.h:439
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:97
Class that contains all the data information.
Definition: DataSet.h:69
virtual ~Factory()
Destructor.
Definition: Factory.cxx:298
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:693
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9495
UInt_t GetNTargets() const
Definition: DataSetInfo.h:126
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:757
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
Definition: Factory.cxx:841
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:192
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:557
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1314
const char * GetName() const
Definition: MethodBase.h:333
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
ClassInfo * GetClassInfo(Int_t clNum) const
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:578
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbosity level, controls granularity of logging
Definition: Factory.h:212
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
Definition: Factory.cxx:1348
class TMVA::Config::VariablePlotting fVariablePlotting
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8656
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition: Factory.cxx:1243
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
Ssiz_t Length() const
Definition: TString.h:405
TAxis * GetYaxis()
Get y axis of the graph.
const TString & GetMethodName() const
Definition: MethodBase.h:330
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1764
TAxis * GetYaxis()
Definition: TH1.h:317
Class that contains all the data information.
void Greetings()
Print welcome message.
Definition: Factory.cxx:288
This is the main MVA steering class.
Definition: Factory.h:81
Tools & gTools()
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:336
TFile * fgTargetFile
Definition: Factory.h:202
virtual Double_t GetSignificance() const
compute significance of mean difference
TString fName
Definition: TNamed.h:32
Definition: graph.py:1
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
Definition: TAttAxis.cxx:304
#define h(i)
Definition: RSha256.hxx:106
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
The Canvas class.
Definition: TCanvas.h:27
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:633
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
#define ClassImp(name)
Definition: Rtypes.h:365
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
Definition: Factory.cxx:742
double Double_t
Definition: RtypesCore.h:55
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Definition: MethodBase.h:374
Double_t y[n]
Definition: legend1.C:17
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
UInt_t GetEntries(const TString &name) const
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:550
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:103
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:820
void AddPreDefVal(const T &)
Definition: Configurable.h:168
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1328
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:69
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
void PrintVariableRanking() const
prints ranking of input variables
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Bool_t IsNull() const
Definition: TString.h:402
const TString & GetOptions() const
Definition: Configurable.h:84
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:74
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:899
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
void SetUseColor(Bool_t uc)
Definition: Config.h:62
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
#define VIBITS
Definition: Factory.cxx:105
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:390
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1306
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:277
TList * GetListOfGraphs() const
Definition: TMultiGraph.h:70
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:982
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:184
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:117
DataInputHandler & DataInput()
TString GetMethodTypeName() const
Definition: MethodBase.h:331
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetSilent(Bool_t s)
Definition: Config.h:65
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:166
void SetWeightFileDir(TString fileDir)
set directory of weight file
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:316
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1452
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1211
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:437
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:218
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6323
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1453
std::vector< IMethod * > MVector
Definition: Factory.h:85
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1278
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:560
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
const Bool_t kTRUE
Definition: RtypesCore.h:87
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
Definition: MethodBase.h:332
void CheckForUnusedOptions() const
checks for unused options in option string
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:436
char name[80]
Definition: TGX11.cxx:109
double log(double)
Class that is the base-class for a vector of result.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Bool_t fVerbose
list of transformations to test
Definition: Factory.h:210
const char * Data() const
Definition: TString.h:364
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Definition: Factory.cxx:496