Logo ROOT  
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Kim Albertsson
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25  * *
26  * Copyright (c) 2005-2015: *
27  * CERN, Switzerland *
28  * U. of Victoria, Canada *
29  * MPI-K Heidelberg, Germany *
30  * U. of Bonn, Germany *
31  * UdeA/ITM, Colombia *
32  * U. of Florida, USA *
33  * *
34  * Redistribution and use in source and binary forms, with or without *
35  * modification, are permitted according to the terms listed in LICENSE *
36  * (http://tmva.sourceforge.net/LICENSE) *
37  **********************************************************************************/
38 
39 /*! \class TMVA::Factory
40 \ingroup TMVA
41 
42 This is the main MVA steering class.
43 It creates all MVA methods, and guides them through the training, testing and
44 evaluation phases.
45 */
46 
47 #include "TMVA/Factory.h"
48 
49 #include "TMVA/ClassifierFactory.h"
50 #include "TMVA/Config.h"
51 #include "TMVA/Configurable.h"
52 #include "TMVA/Tools.h"
53 #include "TMVA/Ranking.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/IMethod.h"
56 #include "TMVA/MethodBase.h"
57 #include "TMVA/DataInputHandler.h"
58 #include "TMVA/DataSetManager.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/DataLoader.h"
61 #include "TMVA/MethodBoost.h"
62 #include "TMVA/MethodCategory.h"
63 #include "TMVA/ROCCalc.h"
64 #include "TMVA/ROCCurve.h"
65 #include "TMVA/MsgLogger.h"
66 
67 #include "TMVA/VariableInfo.h"
68 #include "TMVA/VariableTransform.h"
69 
70 #include "TMVA/Results.h"
72 #include "TMVA/ResultsRegression.h"
73 #include "TMVA/ResultsMulticlass.h"
74 #include <list>
75 #include <bitset>
76 #include <set>
77 
78 #include "TMVA/Types.h"
79 
80 #include "TROOT.h"
81 #include "TFile.h"
82 #include "TLeaf.h"
83 #include "TEventList.h"
84 #include "TH2.h"
85 #include "TGraph.h"
86 #include "TStyle.h"
87 #include "TMatrixF.h"
88 #include "TMatrixDSym.h"
89 #include "TMultiGraph.h"
90 #include "TPrincipal.h"
91 #include "TMath.h"
92 #include "TSystem.h"
93 #include "TCanvas.h"
94 
96 //const Int_t MinNoTestEvents = 1;
97 
99 
100 #define READXML kTRUE
101 
102 //number of bits for bitset
103 #define VIBITS 32
104 
105 
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// Standard constructor.
109 ///
110 /// - jobname : this name will appear in all weight file names produced by the MVAs
111 /// - theTargetFile : output ROOT file; the test tree and all evaluation plots
112 /// will be stored here
113 /// - theOption : option string; currently: "V" for verbose
114 
115 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
116 : Configurable ( theOption ),
117  fTransformations ( "I" ),
118  fVerbose ( kFALSE ),
119  fVerboseLevel ( kINFO ),
120  fCorrelations ( kFALSE ),
121  fROC ( kTRUE ),
122  fSilentFile ( theTargetFile == nullptr ),
123  fJobName ( jobName ),
124  fAnalysisType ( Types::kClassification ),
125  fModelPersistence (kTRUE)
126 {
127  fName = "Factory";
128  fgTargetFile = theTargetFile;
130 
131  // render silent
132  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
133 
134 
135  // init configurable
136  SetConfigDescription( "Configuration options for Factory running" );
137  SetConfigName( GetName() );
138 
139  // histograms are not automatically associated with the current
140  // directory and hence don't go out of scope when closing the file
141  // TH1::AddDirectory(kFALSE);
142  Bool_t silent = kFALSE;
143 #ifdef WIN32
144  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
145  Bool_t color = kFALSE;
146  Bool_t drawProgressBar = kFALSE;
147 #else
148  Bool_t color = !gROOT->IsBatch();
149  Bool_t drawProgressBar = kTRUE;
150 #endif
151  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
152  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
153  AddPreDefVal(TString("Debug"));
154  AddPreDefVal(TString("Verbose"));
155  AddPreDefVal(TString("Info"));
156  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
157  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
158  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
159  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
160  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
161  DeclareOptionRef( drawProgressBar,
162  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
164  "ModelPersistence",
165  "Option to save the trained model in xml file or using serialization");
166 
167  TString analysisType("Auto");
168  DeclareOptionRef( analysisType,
169  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
170  AddPreDefVal(TString("Classification"));
171  AddPreDefVal(TString("Regression"));
172  AddPreDefVal(TString("Multiclass"));
173  AddPreDefVal(TString("Auto"));
174 
175  ParseOptions();
177 
178  if (Verbose()) fLogger->SetMinType( kVERBOSE );
179  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
180  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
181  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
182 
183  // global settings
184  gConfig().SetUseColor( color );
185  gConfig().SetSilent( silent );
186  gConfig().SetDrawProgressBar( drawProgressBar );
187 
188  analysisType.ToLower();
189  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
190  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
191  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
192  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
193 
194 // Greetings();
195 }
196 
197 ////////////////////////////////////////////////////////////////////////////////
198 /// Constructor.
199 
201 : Configurable ( theOption ),
202  fTransformations ( "I" ),
203  fVerbose ( kFALSE ),
204  fCorrelations ( kFALSE ),
205  fROC ( kTRUE ),
206  fSilentFile ( kTRUE ),
207  fJobName ( jobName ),
208  fAnalysisType ( Types::kClassification ),
209  fModelPersistence (kTRUE)
210 {
211  fName = "Factory";
212  fgTargetFile = nullptr;
214 
215 
216  // render silent
217  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
218 
219 
220  // init configurable
221  SetConfigDescription( "Configuration options for Factory running" );
222  SetConfigName( GetName() );
223 
224  // histograms are not automatically associated with the current
225  // directory and hence don't go out of scope when closing the file
227  Bool_t silent = kFALSE;
228 #ifdef WIN32
229  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
230  Bool_t color = kFALSE;
231  Bool_t drawProgressBar = kFALSE;
232 #else
233  Bool_t color = !gROOT->IsBatch();
234  Bool_t drawProgressBar = kTRUE;
235 #endif
236  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
237  DeclareOptionRef( fVerboseLevel=TString("Info"), "VerboseLevel", "VerboseLevel (Debug/Verbose/Info)" );
238  AddPreDefVal(TString("Debug"));
239  AddPreDefVal(TString("Verbose"));
240  AddPreDefVal(TString("Info"));
241  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
242  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
243  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
244  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
245  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
246  DeclareOptionRef( drawProgressBar,
247  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
249  "ModelPersistence",
250  "Option to save the trained model in xml file or using serialization");
251 
252  TString analysisType("Auto");
253  DeclareOptionRef( analysisType,
254  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
255  AddPreDefVal(TString("Classification"));
256  AddPreDefVal(TString("Regression"));
257  AddPreDefVal(TString("Multiclass"));
258  AddPreDefVal(TString("Auto"));
259 
260  ParseOptions();
262 
263  if (Verbose()) fLogger->SetMinType( kVERBOSE );
264  if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG );
265  if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
266  if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO );
267 
268  // global settings
269  gConfig().SetUseColor( color );
270  gConfig().SetSilent( silent );
271  gConfig().SetDrawProgressBar( drawProgressBar );
272 
273  analysisType.ToLower();
274  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
275  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
276  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
277  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
278 
279  Greetings();
280 }
281 
282 ////////////////////////////////////////////////////////////////////////////////
283 /// Print welcome message.
284 /// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
285 
287 {
289  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
290  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
291 }
292 
293 ////////////////////////////////////////////////////////////////////////////////
294 /// Destructor.
295 
297 {
298  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
299  for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
300 
301  this->DeleteAllMethods();
302 
303 
304  // problem with call of REGISTER_METHOD macro ...
305  // ClassifierFactory::DestroyInstance();
306  // Types::DestroyInstance();
307  //Tools::DestroyInstance();
308  //Config::DestroyInstance();
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 /// Delete methods.
313 
315 {
316  std::map<TString,MVector*>::iterator itrMap;
317 
318  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
319  {
320  MVector *methods=itrMap->second;
321  // delete methods
322  MVector::iterator itrMethod = methods->begin();
323  for (; itrMethod != methods->end(); ++itrMethod) {
324  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
325  delete (*itrMethod);
326  }
327  methods->clear();
328  delete methods;
329  }
330 }
331 
332 ////////////////////////////////////////////////////////////////////////////////
333 
335 {
336  fVerbose = v;
337 }
338 
339 ////////////////////////////////////////////////////////////////////////////////
340 /// Book a classifier or regression method.
341 
342 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
343 {
344  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
345 
346  TString datasetname=loader->GetName();
347 
348  if( fAnalysisType == Types::kNoAnalysisType ){
349  if( loader->GetDataSetInfo().GetNClasses()==2
350  && loader->GetDataSetInfo().GetClassInfo("Signal") != NULL
351  && loader->GetDataSetInfo().GetClassInfo("Background") != NULL
352  ){
353  fAnalysisType = Types::kClassification; // default is classification
354  } else if( loader->GetDataSetInfo().GetNClasses() >= 2 ){
355  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
356  } else
357  Log() << kFATAL << "No analysis type for " << loader->GetDataSetInfo().GetNClasses() << " classes and "
358  << loader->GetDataSetInfo().GetNTargets() << " regression targets." << Endl;
359  }
360 
361  // booking via name; the names are translated into enums and the
362  // corresponding overloaded BookMethod is called
363 
364  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
365  {
366  if (GetMethod( datasetname,methodTitle ) != 0) {
367  Log() << kFATAL << "Booking failed since method with title <"
368  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
369  << Endl;
370  }
371  }
372 
373 
374  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
375  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
376  << gTools().Color("reset") << Endl << Endl;
377 
378  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
379  Int_t boostNum = 0;
380  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
381  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
382  "Number of times the classifier will be boosted" );
383  conf->ParseOptions();
384  delete conf;
385  // this is name of weight file directory
386  TString fileDir;
387  if(fModelPersistence)
388  {
389  // find prefix in fWeightFileDir;
391  fileDir = prefix;
392  if (!prefix.IsNull())
393  if (fileDir[fileDir.Length()-1] != '/') fileDir += "/";
394  fileDir += loader->GetName();
395  fileDir += "/" + gConfig().GetIONames().fWeightFileDir;
396  }
397  // initialize methods
398  IMethod* im;
399  if (!boostNum) {
400  im = ClassifierFactory::Instance().Create(theMethodName.Data(), fJobName, methodTitle,
401  loader->GetDataSetInfo(), theOption);
402  }
403  else {
404  // boosted classifier, requires a specific definition, making it transparent for the user
405  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
406  im = ClassifierFactory::Instance().Create("Boost", fJobName, methodTitle, loader->GetDataSetInfo(), theOption);
407  MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im); // DSMTEST divided into two lines
408  if (!methBoost) { // DSMTEST
409  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
410  return nullptr;
411  }
412  if (fModelPersistence) methBoost->SetWeightFileDir(fileDir);
413  methBoost->SetModelPersistence(fModelPersistence);
414  methBoost->SetBoostedMethodName(theMethodName); // DSMTEST divided into two lines
415  methBoost->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
416  methBoost->SetFile(fgTargetFile);
417  methBoost->SetSilentFile(IsSilentFile());
418  }
419 
420  MethodBase *method = dynamic_cast<MethodBase*>(im);
421  if (method==0) return 0; // could not create method
422 
423  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
424  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
425  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
426  if (!methCat) {// DSMTEST
427  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
428  return nullptr;
429  }
430  if(fModelPersistence) methCat->SetWeightFileDir(fileDir);
431  methCat->SetModelPersistence(fModelPersistence);
432  methCat->fDataSetManager = loader->GetDataSetInfo().GetDataSetManager(); // DSMTEST
433  methCat->SetFile(fgTargetFile);
434  methCat->SetSilentFile(IsSilentFile());
435  } // DSMTEST
436 
437 
438  if (!method->HasAnalysisType( fAnalysisType,
439  loader->GetDataSetInfo().GetNClasses(),
440  loader->GetDataSetInfo().GetNTargets() )) {
441  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
442  if (fAnalysisType == Types::kRegression) {
443  Log() << "regression with " << loader->GetDataSetInfo().GetNTargets() << " targets." << Endl;
444  }
445  else if (fAnalysisType == Types::kMulticlass ) {
446  Log() << "multiclass classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
447  }
448  else {
449  Log() << "classification with " << loader->GetDataSetInfo().GetNClasses() << " classes." << Endl;
450  }
451  return 0;
452  }
453 
454  if(fModelPersistence) method->SetWeightFileDir(fileDir);
455  method->SetModelPersistence(fModelPersistence);
456  method->SetAnalysisType( fAnalysisType );
457  method->SetupMethod();
458  method->ParseOptions();
459  method->ProcessSetup();
460  method->SetFile(fgTargetFile);
461  method->SetSilentFile(IsSilentFile());
462 
463  // check-for-unused-options is performed; may be overridden by derived classes
464  method->CheckSetup();
465 
466  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
467  {
468  MVector *mvector=new MVector;
469  fMethodsMap[datasetname]=mvector;
470  }
471  fMethodsMap[datasetname]->push_back( method );
472  return method;
473 }
474 
475 ////////////////////////////////////////////////////////////////////////////////
476 /// Books MVA method. The option configuration string is custom for each MVA
477 /// the TString field "theNameAppendix" serves to define (and distinguish)
478 /// several instances of a given MVA, eg, when one wants to compare the
479 /// performance of various configurations
480 
482 {
483  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
484 }
485 
486 ////////////////////////////////////////////////////////////////////////////////
487 /// Adds an already constructed method to be managed by this factory.
488 ///
489 /// \note Private.
490 /// \note Know what you are doing when using this method. The method that you
491 /// are loading could be trained already.
492 ///
493 
495 {
496  TString datasetname = loader->GetName();
497  std::string methodTypeName = std::string(Types::Instance().GetMethodName(methodType).Data());
498  DataSetInfo &dsi = loader->GetDataSetInfo();
499 
500  IMethod *im = ClassifierFactory::Instance().Create(methodTypeName, dsi, weightfile );
501  MethodBase *method = (dynamic_cast<MethodBase*>(im));
502 
503  if (method == nullptr) return nullptr;
504 
505  if( method->GetMethodType() == Types::kCategory ){
506  Log() << kERROR << "Cannot handle category methods for now." << Endl;
507  }
508 
509  TString fileDir;
510  if(fModelPersistence) {
511  // find prefix in fWeightFileDir;
513  fileDir = prefix;
514  if (!prefix.IsNull())
515  if (fileDir[fileDir.Length() - 1] != '/')
516  fileDir += "/";
517  fileDir=loader->GetName();
518  fileDir+="/"+gConfig().GetIONames().fWeightFileDir;
519  }
520 
521  if(fModelPersistence) method->SetWeightFileDir(fileDir);
522  method->SetModelPersistence(fModelPersistence);
523  method->SetAnalysisType( fAnalysisType );
524  method->SetupMethod();
525  method->SetFile(fgTargetFile);
526  method->SetSilentFile(IsSilentFile());
527 
528  method->DeclareCompatibilityOptions();
529 
530  // read weight file
531  method->ReadStateFromFile();
532 
533  //method->CheckSetup();
534 
535  TString methodTitle = method->GetName();
536  if (HasMethod(datasetname, methodTitle) != 0) {
537  Log() << kFATAL << "Booking failed since method with title <"
538  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
539  << Endl;
540  }
541 
542  Log() << kINFO << "Booked classifier \"" << method->GetMethodName()
543  << "\" of type: \"" << method->GetMethodTypeName() << "\"" << Endl;
544 
545  if(fMethodsMap.count(datasetname) == 0) {
546  MVector *mvector = new MVector;
547  fMethodsMap[datasetname] = mvector;
548  }
549 
550  fMethodsMap[datasetname]->push_back( method );
551 
552  return method;
553 }
554 
555 ////////////////////////////////////////////////////////////////////////////////
556 /// Returns pointer to MVA that corresponds to given method title.
557 
558 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
559 {
560  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
561 
562  MVector *methods=fMethodsMap.find(datasetname)->second;
563 
564  MVector::const_iterator itrMethod;
565  //
566  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
567  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
568  if ( (mva->GetMethodName())==methodTitle ) return mva;
569  }
570  return 0;
571 }
572 
573 ////////////////////////////////////////////////////////////////////////////////
574 /// Checks whether a given method name is defined for a given dataset.
575 
576 Bool_t TMVA::Factory::HasMethod(const TString& datasetname, const TString &methodTitle ) const
577 {
578  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
579 
580  std::string methodName = methodTitle.Data();
581  auto isEqualToMethodName = [&methodName](TMVA::IMethod * m) {
582  return ( 0 == methodName.compare( m->GetName() ) );
583  };
584 
585  TMVA::Factory::MVector * methods = this->fMethodsMap.at(datasetname);
586  Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
587 
588  return isMethodNameExisting;
589 }
590 
591 ////////////////////////////////////////////////////////////////////////////////
592 
594 {
595  RootBaseDir()->cd();
596 
597  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
598  else return; //loader is now in the output file, we dont need to save again
599 
600  RootBaseDir()->cd(fDataSetInfo.GetName());
601  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
602 
603 
604  // correlation matrix of the default DS
605  const TMatrixD* m(0);
606  const TH2* h(0);
607 
608  if(fAnalysisType == Types::kMulticlass){
609  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
610  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
611  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
612  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
613  if (h!=0) {
614  h->Write();
615  delete h;
616  }
617  }
618  }
619  else{
620  m = fDataSetInfo.CorrelationMatrix( "Signal" );
621  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
622  if (h!=0) {
623  h->Write();
624  delete h;
625  }
626 
627  m = fDataSetInfo.CorrelationMatrix( "Background" );
628  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
629  if (h!=0) {
630  h->Write();
631  delete h;
632  }
633 
634  m = fDataSetInfo.CorrelationMatrix( "Regression" );
635  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
636  if (h!=0) {
637  h->Write();
638  delete h;
639  }
640  }
641 
642  // some default transformations to evaluate
643  // NOTE: all transformations are destroyed after this test
644  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
645 
646  // plus some user defined transformations
647  processTrfs = fTransformations;
648 
649  // remove any trace of identity transform - if given (avoid to apply it twice)
650  std::vector<TMVA::TransformationHandler*> trfs;
651  TransformationHandler* identityTrHandler = 0;
652 
653  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
654  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
655  for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
656  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
657  TString trfS = (*trfsDefIt);
658 
659  //Log() << kINFO << Endl;
660  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
662  fDataSetInfo,
663  *(trfs.back()),
664  Log() );
665 
666  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
667  }
668 
669  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
670 
671  // apply all transformations
672  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
673 
674  for (;trfIt != trfs.end(); ++trfIt) {
675  // setting a Root dir causes the variables distributions to be saved to the root file
676  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
677  (*trfIt)->CalcTransformations(inputEvents);
678  }
679  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
680 
681  // clean up
682  for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt) delete *trfIt;
683 }
684 
685 ////////////////////////////////////////////////////////////////////////////////
686 /// Iterates through all booked methods and sees if they use parameter tuning and if so..
687 /// does just that i.e. calls "Method::Train()" for different parameter settings and
688 /// keeps in mind the "optimal one"... and that's the one that will later on be used
689 /// in the main training loop.
690 
691 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
692 {
693 
694  std::map<TString,MVector*>::iterator itrMap;
695  std::map<TString,Double_t> TunedParameters;
696  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
697  {
698  MVector *methods=itrMap->second;
699 
700  MVector::iterator itrMethod;
701 
702  // iterate over methods and optimize
703  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
705  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
706  if (!mva) {
707  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
708  return TunedParameters;
709  }
710 
711  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
712  Log() << kWARNING << "Method " << mva->GetMethodName()
713  << " not trained (training tree has less entries ["
714  << mva->Data()->GetNTrainingEvents()
715  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
716  continue;
717  }
718 
719  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
720  << (fAnalysisType == Types::kRegression ? "Regression" :
721  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
722 
723  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
724  Log() << kINFO << "Optimization of tuning parameters finished for Method:"<<mva->GetName() << Endl;
725  }
726  }
727 
728  return TunedParameters;
729 
730 }
731 
732 ////////////////////////////////////////////////////////////////////////////////
733 /// Private method to generate a ROCCurve instance for a given method.
734 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
735 /// understands.
736 ///
737 /// \note You own the retured pointer.
738 ///
739 
742 {
743  return GetROC((TString)loader->GetName(), theMethodName, iClass, type);
744 }
745 
746 ////////////////////////////////////////////////////////////////////////////////
747 /// Private method to generate a ROCCurve instance for a given method.
748 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
749 /// understands.
750 ///
751 /// \note You own the retured pointer.
752 ///
753 
755 {
756  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
757  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
758  return nullptr;
759  }
760 
761  if (!this->HasMethod(datasetname, theMethodName)) {
762  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
763  << Endl;
764  return nullptr;
765  }
766 
767  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
768  if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
769  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
770  << Endl;
771  return nullptr;
772  }
773 
774  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
775  TMVA::DataSet *dataset = method->Data();
776  dataset->SetCurrentType(type);
777  TMVA::Results *results = dataset->GetResults(theMethodName, type, this->fAnalysisType);
778 
779  UInt_t nClasses = method->DataInfo().GetNClasses();
780  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
781  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
782  iClass, nClasses)
783  << Endl;
784  return nullptr;
785  }
786 
787  TMVA::ROCCurve *rocCurve = nullptr;
788  if (this->fAnalysisType == Types::kClassification) {
789 
790  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
791  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
792  std::vector<Float_t> mvaResWeights;
793 
794  auto eventCollection = dataset->GetEventCollection(type);
795  mvaResWeights.reserve(eventCollection.size());
796  for (auto ev : eventCollection) {
797  mvaResWeights.push_back(ev->GetWeight());
798  }
799 
800  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
801 
802  } else if (this->fAnalysisType == Types::kMulticlass) {
803  std::vector<Float_t> mvaRes;
804  std::vector<Bool_t> mvaResTypes;
805  std::vector<Float_t> mvaResWeights;
806 
807  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
808 
809  // Vector transpose due to values being stored as
810  // [ [0, 1, 2], [0, 1, 2], ... ]
811  // in ResultsMulticlass::GetValueVector.
812  mvaRes.reserve(rawMvaRes->size());
813  for (auto item : *rawMvaRes) {
814  mvaRes.push_back(item[iClass]);
815  }
816 
817  auto eventCollection = dataset->GetEventCollection(type);
818  mvaResTypes.reserve(eventCollection.size());
819  mvaResWeights.reserve(eventCollection.size());
820  for (auto ev : eventCollection) {
821  mvaResTypes.push_back(ev->GetClass() == iClass);
822  mvaResWeights.push_back(ev->GetWeight());
823  }
824 
825  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
826  }
827 
828  return rocCurve;
829 }
830 
831 ////////////////////////////////////////////////////////////////////////////////
832 /// Calculate the integral of the ROC curve, also known as the area under curve
833 /// (AUC), for a given method.
834 ///
835 /// Argument iClass specifies the class to generate the ROC curve in a
836 /// multiclass setting. It is ignored for binary classification.
837 ///
838 
840 {
841  return GetROCIntegral((TString)loader->GetName(), theMethodName, iClass);
842 }
843 
844 ////////////////////////////////////////////////////////////////////////////////
845 /// Calculate the integral of the ROC curve, also known as the area under curve
846 /// (AUC), for a given method.
847 ///
848 /// Argument iClass specifies the class to generate the ROC curve in a
849 /// multiclass setting. It is ignored for binary classification.
850 ///
851 
852 Double_t TMVA::Factory::GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass)
853 {
854  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
855  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
856  return 0;
857  }
858 
859  if ( ! this->HasMethod(datasetname, theMethodName) ) {
860  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
861  return 0;
862  }
863 
864  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
865  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
866  Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
867  << Endl;
868  return 0;
869  }
870 
871  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
872  if (!rocCurve) {
873  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ",
874  theMethodName.Data(), datasetname.Data())
875  << Endl;
876  return 0;
877  }
878 
880  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
881  delete rocCurve;
882 
883  return rocIntegral;
884 }
885 
886 ////////////////////////////////////////////////////////////////////////////////
887 /// Argument iClass specifies the class to generate the ROC curve in a
888 /// multiclass setting. It is ignored for binary classification.
889 ///
890 /// Returns a ROC graph for a given method, or nullptr on error.
891 ///
892 /// Note: Evaluation of the given method must have been run prior to ROC
893 /// generation through Factory::EvaluateAllMetods.
894 ///
895 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
896 /// and the others considered background. This is ok in binary classification
897 /// but in in multi class classification, the ROC surface is an N dimensional
898 /// shape, where N is number of classes - 1.
899 
900 TGraph* TMVA::Factory::GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles, UInt_t iClass)
901 {
902  return GetROCCurve( (TString)loader->GetName(), theMethodName, setTitles, iClass );
903 }
904 
905 ////////////////////////////////////////////////////////////////////////////////
906 /// Argument iClass specifies the class to generate the ROC curve in a
907 /// multiclass setting. It is ignored for binary classification.
908 ///
909 /// Returns a ROC graph for a given method, or nullptr on error.
910 ///
911 /// Note: Evaluation of the given method must have been run prior to ROC
912 /// generation through Factory::EvaluateAllMetods.
913 ///
914 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
915 /// and the others considered background. This is ok in binary classification
916 /// but in in multi class classification, the ROC surface is an N dimensional
917 /// shape, where N is number of classes - 1.
918 
919 TGraph* TMVA::Factory::GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles, UInt_t iClass)
920 {
921  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
922  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
923  return nullptr;
924  }
925 
926  if ( ! this->HasMethod(datasetname, theMethodName) ) {
927  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
928  return nullptr;
929  }
930 
931  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
932  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
933  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.") << Endl;
934  return nullptr;
935  }
936 
937  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
938  TGraph *graph = nullptr;
939 
940  if ( ! rocCurve ) {
941  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
942  return nullptr;
943  }
944 
945  graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
946  delete rocCurve;
947 
948  if(setTitles) {
949  graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
950  graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
951  graph->SetTitle(Form("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()));
952  }
953 
954  return graph;
955 }
956 
957 ////////////////////////////////////////////////////////////////////////////////
958 /// Generate a collection of graphs, for all methods for a given class. Suitable
959 /// for comparing method performance.
960 ///
961 /// Argument iClass specifies the class to generate the ROC curve in a
962 /// multiclass setting. It is ignored for binary classification.
963 ///
964 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
965 /// and the others considered background. This is ok in binary classification
966 /// but in in multi class classification, the ROC surface is an N dimensional
967 /// shape, where N is number of classes - 1.
968 
970 {
971  return GetROCCurveAsMultiGraph((TString)loader->GetName(), iClass);
972 }
973 
974 ////////////////////////////////////////////////////////////////////////////////
975 /// Generate a collection of graphs, for all methods for a given class. Suitable
976 /// for comparing method performance.
977 ///
978 /// Argument iClass specifies the class to generate the ROC curve in a
979 /// multiclass setting. It is ignored for binary classification.
980 ///
981 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
982 /// and the others considered background. This is ok in binary classification
983 /// but in in multi class classification, the ROC surface is an N dimensional
984 /// shape, where N is number of classes - 1.
985 
987 {
988  UInt_t line_color = 1;
989 
990  TMultiGraph *multigraph = new TMultiGraph();
991 
992  MVector *methods = fMethodsMap[datasetname.Data()];
993  for (auto * method_raw : *methods) {
994  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(method_raw);
995  if (method == nullptr) { continue; }
996 
997  TString methodName = method->GetMethodName();
998  UInt_t nClasses = method->DataInfo().GetNClasses();
999 
1000  if ( this->fAnalysisType == Types::kMulticlass && iClass >= nClasses ) {
1001  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) << Endl;
1002  continue;
1003  }
1004 
1005  TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
1006 
1007  TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass);
1008  graph->SetTitle(methodName);
1009 
1010  graph->SetLineWidth(2);
1011  graph->SetLineColor(line_color++);
1012  graph->SetFillColor(10);
1013 
1014  multigraph->Add(graph);
1015  }
1016 
1017  if ( multigraph->GetListOfGraphs() == nullptr ) {
1018  Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
1019  return nullptr;
1020  }
1021 
1022  return multigraph;
1023 }
1024 
1025 ////////////////////////////////////////////////////////////////////////////////
1026 /// Draws ROC curves for all methods booked with the factory for a given class
1027 /// onto a canvas.
1028 ///
1029 /// Argument iClass specifies the class to generate the ROC curve in a
1030 /// multiclass setting. It is ignored for binary classification.
1031 ///
1032 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1033 /// and the others considered background. This is ok in binary classification
1034 /// but in in multi class classification, the ROC surface is an N dimensional
1035 /// shape, where N is number of classes - 1.
1036 
1038 {
1039  return GetROCCurve((TString)loader->GetName(), iClass);
1040 }
1041 
1042 ////////////////////////////////////////////////////////////////////////////////
1043 /// Draws ROC curves for all methods booked with the factory for a given class.
1044 ///
1045 /// Argument iClass specifies the class to generate the ROC curve in a
1046 /// multiclass setting. It is ignored for binary classification.
1047 ///
1048 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
1049 /// and the others considered background. This is ok in binary classification
1050 /// but in in multi class classification, the ROC surface is an N dimensional
1051 /// shape, where N is number of classes - 1.
1052 
1054 {
1055  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1056  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
1057  return 0;
1058  }
1059 
1060  TString name = Form("ROCCurve %s class %i", datasetname.Data(), iClass);
1061  TCanvas *canvas = new TCanvas(name, "ROC Curve", 200, 10, 700, 500);
1062  canvas->SetGrid();
1063 
1064  TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1065 
1066  if ( multigraph ) {
1067  multigraph->Draw("AL");
1068 
1069  multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
1070  multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
1071 
1072  TString titleString = Form("Signal efficiency vs. Background rejection");
1073  if (this->fAnalysisType == Types::kMulticlass) {
1074  titleString = Form("%s (Class=%i)", titleString.Data(), iClass);
1075  }
1076 
1077  // Workaround for TMultigraph not drawing title correctly.
1078  multigraph->GetHistogram()->SetTitle( titleString );
1079  multigraph->SetTitle( titleString );
1080 
1081  canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1082  }
1083 
1084  return canvas;
1085 }
1086 
1087 ////////////////////////////////////////////////////////////////////////////////
1088 /// Iterates through all booked methods and calls training
1089 
1091 {
1092  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1093  // iterates over all MVAs that have been booked, and calls their training methods
1094 
1095 
1096  // don't do anything if no method booked
1097  if (fMethodsMap.empty()) {
1098  Log() << kINFO << "...nothing found to train" << Endl;
1099  return;
1100  }
1101 
1102  // here the training starts
1103  //Log() << kINFO << " " << Endl;
1104  Log() << kDEBUG << "Train all methods for "
1105  << (fAnalysisType == Types::kRegression ? "Regression" :
1106  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
1107 
1108  std::map<TString,MVector*>::iterator itrMap;
1109 
1110  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1111  {
1112  MVector *methods=itrMap->second;
1113  MVector::iterator itrMethod;
1114 
1115  // iterate over methods and train
1116  for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1118  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1119 
1120  if(mva==0) continue;
1121 
1122  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1123  Log() << kFATAL << "No input data for the training provided!" << Endl;
1124  }
1125 
1126  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
1127  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1128  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
1129  && mva->DataInfo().GetNClasses() < 2 )
1130  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1131 
1132  // first print some information about the default dataset
1133  if(!IsSilentFile()) WriteDataInformation(mva->fDataSetInfo);
1134 
1135 
1136  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1137  Log() << kWARNING << "Method " << mva->GetMethodName()
1138  << " not trained (training tree has less entries ["
1139  << mva->Data()->GetNTrainingEvents()
1140  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1141  continue;
1142  }
1143 
1144  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1145  << (fAnalysisType == Types::kRegression ? "Regression" :
1146  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
1147  mva->TrainMethod();
1148  Log() << kHEADER << "Training finished" << Endl << Endl;
1149  }
1150 
1151  if (fAnalysisType != Types::kRegression) {
1152 
1153  // variable ranking
1154  //Log() << Endl;
1155  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1156  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1157  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1158  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1159 
1160  // create and print ranking
1161  const Ranking* ranking = (*itrMethod)->CreateRanking();
1162  if (ranking != 0) ranking->Print();
1163  else Log() << kINFO << "No variable ranking supplied by classifier: "
1164  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
1165  }
1166  }
1167  }
1168 
1169  // save training history in case we are not in the silent mode
1170  if (!IsSilentFile()) {
1171  for (UInt_t i=0; i<methods->size(); i++) {
1172  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
1173  if(m==0) continue;
1174  m->BaseDir()->cd();
1175  m->fTrainHistory.SaveHistory(m->GetMethodName());
1176  }
1177  }
1178 
1179  // delete all methods and recreate them from weight file - this ensures that the application
1180  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1181  // in the testing
1182  //Log() << Endl;
1183  if (fModelPersistence) {
1184 
1185  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1186 
1187  if(!IsSilentFile())RootBaseDir()->cd();
1188 
1189  // iterate through all booked methods
1190  for (UInt_t i=0; i<methods->size(); i++) {
1191 
1192  MethodBase *m = dynamic_cast<MethodBase *>((*methods)[i]);
1193  if (m == nullptr)
1194  continue;
1195 
1196  TMVA::Types::EMVA methodType = m->GetMethodType();
1197  TString weightfile = m->GetWeightFileName();
1198 
1199  // decide if .txt or .xml file should be read:
1200  if (READXML)
1201  weightfile.ReplaceAll(".txt", ".xml");
1202 
1203  DataSetInfo &dataSetInfo = m->DataInfo();
1204  TString testvarName = m->GetTestvarName();
1205  delete m; // itrMethod[i];
1206 
1207  // recreate
1208  m = dynamic_cast<MethodBase *>(ClassifierFactory::Instance().Create(
1209  Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1210  if (m->GetMethodType() == Types::kCategory) {
1211  MethodCategory *methCat = (dynamic_cast<MethodCategory *>(m));
1212  if (!methCat)
1213  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1214  else
1215  methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1216  }
1217  // ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1218 
1219  TString wfileDir = m->DataInfo().GetName();
1220  wfileDir += "/" + gConfig().GetIONames().fWeightFileDir;
1221  m->SetWeightFileDir(wfileDir);
1222  m->SetModelPersistence(fModelPersistence);
1223  m->SetSilentFile(IsSilentFile());
1224  m->SetAnalysisType(fAnalysisType);
1225  m->SetupMethod();
1226  m->ReadStateFromFile();
1227  m->SetTestvarName(testvarName);
1228 
1229  // replace trained method by newly created one (from weight file) in methods vector
1230  (*methods)[i] = m;
1231  }
1232  }
1233  }
1234 }
1235 
1236 ////////////////////////////////////////////////////////////////////////////////
1237 /// Evaluates all booked methods on the testing data and adds the output to the
1238 /// Results in the corresponiding DataSet.
1239 ///
1240 
1242 {
1243  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1244 
1245  // don't do anything if no method booked
1246  if (fMethodsMap.empty()) {
1247  Log() << kINFO << "...nothing found to test" << Endl;
1248  return;
1249  }
1250  std::map<TString,MVector*>::iterator itrMap;
1251 
1252  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1253  {
1254  MVector *methods=itrMap->second;
1255  MVector::iterator itrMethod;
1256 
1257  // iterate over methods and test
1258  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1260  MethodBase *mva = dynamic_cast<MethodBase *>(*itrMethod);
1261  if (mva == 0)
1262  continue;
1263  Types::EAnalysisType analysisType = mva->GetAnalysisType();
1264  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1265  << (analysisType == Types::kRegression
1266  ? "Regression"
1267  : (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification"))
1268  << " performance" << Endl << Endl;
1269  mva->AddOutput(Types::kTesting, analysisType);
1270  }
1271  }
1272 }
1273 
1274 ////////////////////////////////////////////////////////////////////////////////
1275 
1276 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
1277 {
1278  if (methodTitle != "") {
1279  IMethod* method = GetMethod(datasetname, methodTitle);
1280  if (method) method->MakeClass();
1281  else {
1282  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
1283  << "\" in list" << Endl;
1284  }
1285  }
1286  else {
1287 
1288  // no classifier specified, print all help messages
1289  MVector *methods=fMethodsMap.find(datasetname)->second;
1290  MVector::const_iterator itrMethod;
1291  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1292  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1293  if(method==0) continue;
1294  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1295  method->MakeClass();
1296  }
1297  }
1298 }
1299 
1300 ////////////////////////////////////////////////////////////////////////////////
1301 /// Print predefined help message of classifier.
1302 /// Iterate over methods and test.
1303 
1304 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1305 {
1306  if (methodTitle != "") {
1307  IMethod* method = GetMethod(datasetname , methodTitle );
1308  if (method) method->PrintHelpMessage();
1309  else {
1310  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1311  << "\" in list" << Endl;
1312  }
1313  }
1314  else {
1315 
1316  // no classifier specified, print all help messages
1317  MVector *methods=fMethodsMap.find(datasetname)->second;
1318  MVector::const_iterator itrMethod ;
1319  for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1320  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1321  if(method==0) continue;
1322  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1323  method->PrintHelpMessage();
1324  }
1325  }
1326 }
1327 
1328 ////////////////////////////////////////////////////////////////////////////////
1329 /// Iterates over all MVA input variables and evaluates them.
1330 
1332 {
1333  Log() << kINFO << "Evaluating all variables..." << Endl;
1335 
1336  for (UInt_t i=0; i<loader->GetDataSetInfo().GetNVariables(); i++) {
1337  TString s = loader->GetDataSetInfo().GetVariableInfo(i).GetLabel();
1338  if (options.Contains("V")) s += ":V";
1339  this->BookMethod(loader, "Variable", s );
1340  }
1341 }
1342 
1343 ////////////////////////////////////////////////////////////////////////////////
1344 /// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1345 
1347 {
1348  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1349 
1350  // don't do anything if no method booked
1351  if (fMethodsMap.empty()) {
1352  Log() << kINFO << "...nothing found to evaluate" << Endl;
1353  return;
1354  }
1355  std::map<TString,MVector*>::iterator itrMap;
1356 
1357  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1358  {
1359  MVector *methods=itrMap->second;
1360 
1361  // -----------------------------------------------------------------------
1362  // First part of evaluation process
1363  // --> compute efficiencies, and other separation estimators
1364  // -----------------------------------------------------------------------
1365 
1366  // although equal, we now want to separate the output for the variables
1367  // and the real methods
1368  Int_t isel; // will be 0 for a Method; 1 for a Variable
1369  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1370 
1371  std::vector<std::vector<TString> > mname(2);
1372  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1373  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1374  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1375  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1376 
1377  std::vector<std::vector<Float_t> > multiclass_testEff;
1378  std::vector<std::vector<Float_t> > multiclass_trainEff;
1379  std::vector<std::vector<Float_t> > multiclass_testPur;
1380  std::vector<std::vector<Float_t> > multiclass_trainPur;
1381 
1382  std::vector<std::vector<Float_t> > train_history;
1383 
1384  // Multiclass confusion matrices.
1385  std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1386  std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1387  std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1388  std::vector<TMatrixD> multiclass_testConfusionEffB01;
1389  std::vector<TMatrixD> multiclass_testConfusionEffB10;
1390  std::vector<TMatrixD> multiclass_testConfusionEffB30;
1391 
1392  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1393  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1394  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1395  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1396  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1397  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1398  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1399  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1400  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1401  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1402 
1403  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1404  std::vector<std::vector<Double_t> > biastrainT(1);
1405  std::vector<std::vector<Double_t> > biastestT(1);
1406  std::vector<std::vector<Double_t> > devtrainT(1);
1407  std::vector<std::vector<Double_t> > devtestT(1);
1408  std::vector<std::vector<Double_t> > rmstrainT(1);
1409  std::vector<std::vector<Double_t> > rmstestT(1);
1410  std::vector<std::vector<Double_t> > minftrainT(1);
1411  std::vector<std::vector<Double_t> > minftestT(1);
1412 
1413  // following vector contains all methods - with the exception of Cuts, which are special
1414  MVector methodsNoCuts;
1415 
1416  Bool_t doRegression = kFALSE;
1417  Bool_t doMulticlass = kFALSE;
1418 
1419  // iterate over methods and evaluate
1420  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1422  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1423  if(theMethod==0) continue;
1424  theMethod->SetFile(fgTargetFile);
1425  theMethod->SetSilentFile(IsSilentFile());
1426  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1427 
1428  if (theMethod->DoRegression()) {
1429  doRegression = kTRUE;
1430 
1431  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1432  Double_t bias, dev, rms, mInf;
1433  Double_t biasT, devT, rmsT, mInfT;
1434  Double_t rho;
1435 
1436  Log() << kINFO << "TestRegression (testing)" << Endl;
1437  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1438  biastest[0] .push_back( bias );
1439  devtest[0] .push_back( dev );
1440  rmstest[0] .push_back( rms );
1441  minftest[0] .push_back( mInf );
1442  rhotest[0] .push_back( rho );
1443  biastestT[0] .push_back( biasT );
1444  devtestT[0] .push_back( devT );
1445  rmstestT[0] .push_back( rmsT );
1446  minftestT[0] .push_back( mInfT );
1447 
1448  Log() << kINFO << "TestRegression (training)" << Endl;
1449  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1450  biastrain[0] .push_back( bias );
1451  devtrain[0] .push_back( dev );
1452  rmstrain[0] .push_back( rms );
1453  minftrain[0] .push_back( mInf );
1454  rhotrain[0] .push_back( rho );
1455  biastrainT[0].push_back( biasT );
1456  devtrainT[0] .push_back( devT );
1457  rmstrainT[0] .push_back( rmsT );
1458  minftrainT[0].push_back( mInfT );
1459 
1460  mname[0].push_back( theMethod->GetMethodName() );
1461  nmeth_used[0]++;
1462  if (!IsSilentFile()) {
1463  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1466  }
1467  } else if (theMethod->DoMulticlass()) {
1468  // ====================================================================
1469  // === Multiclass evaluation
1470  // ====================================================================
1471  doMulticlass = kTRUE;
1472  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1473 
1474  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1475  // This is why it is disabled for now.
1476  // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1477  // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1478  // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1479 
1480  theMethod->TestMulticlass();
1481 
1482  // Confusion matrix at three background efficiency levels
1483  multiclass_trainConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTraining));
1484  multiclass_trainConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTraining));
1485  multiclass_trainConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTraining));
1486 
1487  multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1488  multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1489  multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1490 
1491  if (!IsSilentFile()) {
1492  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1495  }
1496 
1497  nmeth_used[0]++;
1498  mname[0].push_back(theMethod->GetMethodName());
1499  } else {
1500 
1501  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1502  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1503 
1504  // perform the evaluation
1505  theMethod->TestClassification();
1506 
1507  // evaluate the classifier
1508  mname[isel].push_back(theMethod->GetMethodName());
1509  sig[isel].push_back(theMethod->GetSignificance());
1510  sep[isel].push_back(theMethod->GetSeparation());
1511  roc[isel].push_back(theMethod->GetROCIntegral());
1512 
1513  Double_t err;
1514  eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1515  eff01err[isel].push_back(err);
1516  eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1517  eff10err[isel].push_back(err);
1518  eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1519  eff30err[isel].push_back(err);
1520  effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1521 
1522  trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1523  trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1524  trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1525 
1526  nmeth_used[isel]++;
1527 
1528  if (!IsSilentFile()) {
1529  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1532  }
1533  }
1534  }
1535  if (doRegression) {
1536 
1537  std::vector<TString> vtemps = mname[0];
1538  std::vector< std::vector<Double_t> > vtmp;
1539  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1540  vtmp.push_back( devtrain[0] );
1541  vtmp.push_back( biastest[0] );
1542  vtmp.push_back( biastrain[0] );
1543  vtmp.push_back( rmstest[0] );
1544  vtmp.push_back( rmstrain[0] );
1545  vtmp.push_back( minftest[0] );
1546  vtmp.push_back( minftrain[0] );
1547  vtmp.push_back( rhotest[0] );
1548  vtmp.push_back( rhotrain[0] );
1549  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1550  vtmp.push_back( devtrainT[0] );
1551  vtmp.push_back( biastestT[0] );
1552  vtmp.push_back( biastrainT[0]);
1553  vtmp.push_back( rmstestT[0] );
1554  vtmp.push_back( rmstrainT[0] );
1555  vtmp.push_back( minftestT[0] );
1556  vtmp.push_back( minftrainT[0]);
1557  gTools().UsefulSortAscending( vtmp, &vtemps );
1558  mname[0] = vtemps;
1559  devtest[0] = vtmp[0];
1560  devtrain[0] = vtmp[1];
1561  biastest[0] = vtmp[2];
1562  biastrain[0] = vtmp[3];
1563  rmstest[0] = vtmp[4];
1564  rmstrain[0] = vtmp[5];
1565  minftest[0] = vtmp[6];
1566  minftrain[0] = vtmp[7];
1567  rhotest[0] = vtmp[8];
1568  rhotrain[0] = vtmp[9];
1569  devtestT[0] = vtmp[10];
1570  devtrainT[0] = vtmp[11];
1571  biastestT[0] = vtmp[12];
1572  biastrainT[0] = vtmp[13];
1573  rmstestT[0] = vtmp[14];
1574  rmstrainT[0] = vtmp[15];
1575  minftestT[0] = vtmp[16];
1576  minftrainT[0] = vtmp[17];
1577  } else if (doMulticlass) {
1578  // TODO: fill in something meaningful
1579  // If there is some ranking of methods to be done it should be done here.
1580  // However, this is not so easy to define for multiclass so it is left out for now.
1581 
1582  }
1583  else {
1584  // now sort the variables according to the best 'eff at Beff=0.10'
1585  for (Int_t k=0; k<2; k++) {
1586  std::vector< std::vector<Double_t> > vtemp;
1587  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1588  vtemp.push_back( eff10[k] );
1589  vtemp.push_back( eff01[k] );
1590  vtemp.push_back( eff30[k] );
1591  vtemp.push_back( eff10err[k] );
1592  vtemp.push_back( eff01err[k] );
1593  vtemp.push_back( eff30err[k] );
1594  vtemp.push_back( trainEff10[k] );
1595  vtemp.push_back( trainEff01[k] );
1596  vtemp.push_back( trainEff30[k] );
1597  vtemp.push_back( sig[k] );
1598  vtemp.push_back( sep[k] );
1599  vtemp.push_back( roc[k] );
1600  std::vector<TString> vtemps = mname[k];
1601  gTools().UsefulSortDescending( vtemp, &vtemps );
1602  effArea[k] = vtemp[0];
1603  eff10[k] = vtemp[1];
1604  eff01[k] = vtemp[2];
1605  eff30[k] = vtemp[3];
1606  eff10err[k] = vtemp[4];
1607  eff01err[k] = vtemp[5];
1608  eff30err[k] = vtemp[6];
1609  trainEff10[k] = vtemp[7];
1610  trainEff01[k] = vtemp[8];
1611  trainEff30[k] = vtemp[9];
1612  sig[k] = vtemp[10];
1613  sep[k] = vtemp[11];
1614  roc[k] = vtemp[12];
1615  mname[k] = vtemps;
1616  }
1617  }
1618 
1619  // -----------------------------------------------------------------------
1620  // Second part of evaluation process
1621  // --> compute correlations among MVAs
1622  // --> compute correlations between input variables and MVA (determines importance)
1623  // --> count overlaps
1624  // -----------------------------------------------------------------------
1625  if(fCorrelations)
1626  {
1627  const Int_t nmeth = methodsNoCuts.size();
1628  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1629  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1630  if (!doRegression && !doMulticlass ) {
1631 
1632  if (nmeth > 0) {
1633 
1634  // needed for correlations
1635  Double_t *dvec = new Double_t[nmeth+nvar];
1636  std::vector<Double_t> rvec;
1637 
1638  // for correlations
1639  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1640  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1641 
1642  // set required tree branch references
1643  Int_t ivar = 0;
1644  std::vector<TString>* theVars = new std::vector<TString>;
1645  std::vector<ResultsClassification*> mvaRes;
1646  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1647  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1648  if(m==0) continue;
1649  theVars->push_back( m->GetTestvarName() );
1650  rvec.push_back( m->GetSignalReferenceCut() );
1651  theVars->back().ReplaceAll( "MVA_", "" );
1652  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1655  }
1656 
1657  // for overlap study
1658  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1659  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1660  (*overlapS) *= 0; // init...
1661  (*overlapB) *= 0; // init...
1662 
1663  // loop over test tree
1664  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1666  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1667  const Event* ev = defDs->GetEvent(ievt);
1668 
1669  // for correlations
1670  TMatrixD* theMat = 0;
1671  for (Int_t im=0; im<nmeth; im++) {
1672  // check for NaN value
1673  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1674  if (TMath::IsNaN(retval)) {
1675  Log() << kWARNING << "Found NaN return value in event: " << ievt
1676  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1677  dvec[im] = 0;
1678  }
1679  else dvec[im] = retval;
1680  }
1681  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1682  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1683  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1684 
1685  // count overlaps
1686  for (Int_t im=0; im<nmeth; im++) {
1687  for (Int_t jm=im; jm<nmeth; jm++) {
1688  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1689  (*theMat)(im,jm)++;
1690  if (im != jm) (*theMat)(jm,im)++;
1691  }
1692  }
1693  }
1694  }
1695 
1696  // renormalise overlap matrix
1697  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1698  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1699 
1700  tpSig->MakePrincipals();
1701  tpBkg->MakePrincipals();
1702 
1703  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1704  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1705 
1706  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1707  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1708 
1709  // print correlation matrices
1710  if (corrMatS != 0 && corrMatB != 0) {
1711 
1712  // extract MVA matrix
1713  TMatrixD mvaMatS(nmeth,nmeth);
1714  TMatrixD mvaMatB(nmeth,nmeth);
1715  for (Int_t im=0; im<nmeth; im++) {
1716  for (Int_t jm=0; jm<nmeth; jm++) {
1717  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1718  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1719  }
1720  }
1721 
1722  // extract variables - to MVA matrix
1723  std::vector<TString> theInputVars;
1724  TMatrixD varmvaMatS(nvar,nmeth);
1725  TMatrixD varmvaMatB(nvar,nmeth);
1726  for (Int_t iv=0; iv<nvar; iv++) {
1727  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1728  for (Int_t jm=0; jm<nmeth; jm++) {
1729  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1730  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1731  }
1732  }
1733 
1734  if (nmeth > 1) {
1735  Log() << kINFO << Endl;
1736  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1737  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1738  Log() << kINFO << Endl;
1739 
1740  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1741  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1742  Log() << kINFO << Endl;
1743  }
1744 
1745  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1746  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1747  Log() << kINFO << Endl;
1748 
1749  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1750  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1751  Log() << kINFO << Endl;
1752  }
1753  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1754 
1755  // print overlap matrices
1756  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1757  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1758  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1759  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1760  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1761 
1762  // give notice that cut method has been excluded from this test
1763  if (nmeth != (Int_t)methods->size())
1764  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1765 
1766  if (nmeth > 1) {
1767  Log() << kINFO << Endl;
1768  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1769  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1770  Log() << kINFO << Endl;
1771 
1772  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1773  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1774  }
1775 
1776  // cleanup
1777  delete tpSig;
1778  delete tpBkg;
1779  delete corrMatS;
1780  delete corrMatB;
1781  delete theVars;
1782  delete overlapS;
1783  delete overlapB;
1784  delete [] dvec;
1785  }
1786  }
1787  }
1788  // -----------------------------------------------------------------------
1789  // Third part of evaluation process
1790  // --> output
1791  // -----------------------------------------------------------------------
1792 
1793  if (doRegression) {
1794 
1795  Log() << kINFO << Endl;
1796  TString hLine = "--------------------------------------------------------------------------------------------------";
1797  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1798  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1799  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1800  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1801  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1802  Log() << kINFO << hLine << Endl;
1803  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1804  Log() << kINFO << hLine << Endl;
1805 
1806  for (Int_t i=0; i<nmeth_used[0]; i++) {
1807  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1808  if(theMethod==0) continue;
1809 
1810  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1811  theMethod->fDataSetInfo.GetName(),
1812  (const char*)mname[0][i],
1813  biastest[0][i], biastestT[0][i],
1814  rmstest[0][i], rmstestT[0][i],
1815  minftest[0][i], minftestT[0][i] )
1816  << Endl;
1817  }
1818  Log() << kINFO << hLine << Endl;
1819  Log() << kINFO << Endl;
1820  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1821  Log() << kINFO << "(overtraining check)" << Endl;
1822  Log() << kINFO << hLine << Endl;
1823  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1824  Log() << kINFO << hLine << Endl;
1825 
1826  for (Int_t i=0; i<nmeth_used[0]; i++) {
1827  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1828  if(theMethod==0) continue;
1829  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1830  theMethod->fDataSetInfo.GetName(),
1831  (const char*)mname[0][i],
1832  biastrain[0][i], biastrainT[0][i],
1833  rmstrain[0][i], rmstrainT[0][i],
1834  minftrain[0][i], minftrainT[0][i] )
1835  << Endl;
1836  }
1837  Log() << kINFO << hLine << Endl;
1838  Log() << kINFO << Endl;
1839  } else if (doMulticlass) {
1840  // ====================================================================
1841  // === Multiclass Output
1842  // ====================================================================
1843 
1844  TString hLine =
1845  "-------------------------------------------------------------------------------------------------------";
1846 
1847  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1848  // This is why it is disabled for now.
1849  //
1850  // // --- Acheivable signal efficiency * signal purity
1851  // // --------------------------------------------------------------------
1852  // Log() << kINFO << Endl;
1853  // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1854  // Log() << kINFO << hLine << Endl;
1855 
1856  // // iterate over methods and evaluate
1857  // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1858  // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1859  // if (theMethod == 0) {
1860  // continue;
1861  // }
1862 
1863  // TString header = "DataSet Name MVA Method ";
1864  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1865  // header += Form("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1866  // }
1867 
1868  // Log() << kINFO << header << Endl;
1869  // Log() << kINFO << hLine << Endl;
1870  // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1871  // TString res = Form("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), (const char *)mname[0][i]);
1872  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1873  // res += Form("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1874  // }
1875  // Log() << kINFO << res << Endl;
1876  // }
1877 
1878  // Log() << kINFO << hLine << Endl;
1879  // Log() << kINFO << Endl;
1880  // }
1881 
1882  // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1883  // --------------------------------------------------------------------
1884  TString header1 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Dataset", "MVA Method", "ROC AUC", "Sig eff@B=0.01",
1885  "Sig eff@B=0.10", "Sig eff@B=0.30");
1886  TString header2 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Name:", "/ Class:", "test (train)", "test (train)",
1887  "test (train)", "test (train)");
1888  Log() << kINFO << Endl;
1889  Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1890  Log() << kINFO << hLine << Endl;
1891  Log() << kINFO << Endl;
1892  Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1893  Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1894  Log() << kINFO << "A score of 0.820 (0.850) means 0.820 was acheived on the" << Endl;
1895  Log() << kINFO << "test set and 0.850 on the training set." << Endl;
1896 
1897  Log() << kINFO << Endl;
1898  Log() << kINFO << header1 << Endl;
1899  Log() << kINFO << header2 << Endl;
1900  for (Int_t k = 0; k < 2; k++) {
1901  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1902  if (k == 1) {
1903  mname[k][i].ReplaceAll("Variable_", "");
1904  }
1905 
1906  const TString datasetName = itrMap->first;
1907  const TString mvaName = mname[k][i];
1908 
1909  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1910  if (theMethod == 0) {
1911  continue;
1912  }
1913 
1914  Log() << kINFO << Endl;
1915  TString row = Form("%-15s%-15s", datasetName.Data(), mvaName.Data());
1916  Log() << kINFO << row << Endl;
1917  Log() << kINFO << "------------------------------" << Endl;
1918 
1919  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1920  for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1921 
1922  ROCCurve *rocCurveTrain = GetROC(datasetName, mvaName, iClass, Types::kTraining);
1923  ROCCurve *rocCurveTest = GetROC(datasetName, mvaName, iClass, Types::kTesting);
1924 
1925  const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1926  const Double_t rocaucTrain = rocCurveTrain->GetROCIntegral();
1927  const Double_t effB01Train = rocCurveTrain->GetEffSForEffB(0.01);
1928  const Double_t effB10Train = rocCurveTrain->GetEffSForEffB(0.10);
1929  const Double_t effB30Train = rocCurveTrain->GetEffSForEffB(0.30);
1930  const Double_t rocaucTest = rocCurveTest->GetROCIntegral();
1931  const Double_t effB01Test = rocCurveTest->GetEffSForEffB(0.01);
1932  const Double_t effB10Test = rocCurveTest->GetEffSForEffB(0.10);
1933  const Double_t effB30Test = rocCurveTest->GetEffSForEffB(0.30);
1934  const TString rocaucCmp = Form("%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1935  const TString effB01Cmp = Form("%5.3f (%5.3f)", effB01Test, effB01Train);
1936  const TString effB10Cmp = Form("%5.3f (%5.3f)", effB10Test, effB10Train);
1937  const TString effB30Cmp = Form("%5.3f (%5.3f)", effB30Test, effB30Train);
1938  row = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "", className.Data(), rocaucCmp.Data(), effB01Cmp.Data(),
1939  effB10Cmp.Data(), effB30Cmp.Data());
1940  Log() << kINFO << row << Endl;
1941 
1942  delete rocCurveTrain;
1943  delete rocCurveTest;
1944  }
1945  }
1946  }
1947  Log() << kINFO << Endl;
1948  Log() << kINFO << hLine << Endl;
1949  Log() << kINFO << Endl;
1950 
1951  // --- Confusion matrices
1952  // --------------------------------------------------------------------
1953  auto printMatrix = [](TMatrixD const &matTraining, TMatrixD const &matTesting, std::vector<TString> classnames,
1954  UInt_t numClasses, MsgLogger &stream) {
1955  // assert (classLabledWidth >= valueLabelWidth + 2)
1956  // if (...) {Log() << kWARN << "..." << Endl; }
1957 
1958  // TODO: Ensure matrices are same size.
1959 
1960  TString header = Form(" %-14s", " ");
1961  TString headerInfo = Form(" %-14s", " ");
1962  ;
1963  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1964  header += Form(" %-14s", classnames[iCol].Data());
1965  headerInfo += Form(" %-14s", " test (train)");
1966  }
1967  stream << kINFO << header << Endl;
1968  stream << kINFO << headerInfo << Endl;
1969 
1970  for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1971  stream << kINFO << Form(" %-14s", classnames[iRow].Data());
1972 
1973  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1974  if (iCol == iRow) {
1975  stream << kINFO << Form(" %-14s", "-");
1976  } else {
1977  Double_t trainValue = matTraining[iRow][iCol];
1978  Double_t testValue = matTesting[iRow][iCol];
1979  TString entry = Form("%-5.3f (%-5.3f)", testValue, trainValue);
1980  stream << kINFO << Form(" %-14s", entry.Data());
1981  }
1982  }
1983  stream << kINFO << Endl;
1984  }
1985  };
1986 
1987  Log() << kINFO << Endl;
1988  Log() << kINFO << "Confusion matrices for all methods" << Endl;
1989  Log() << kINFO << hLine << Endl;
1990  Log() << kINFO << Endl;
1991  Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
1992  Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
1993  Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
1994  Log() << kINFO << "by the column index is considered background." << Endl;
1995  Log() << kINFO << Endl;
1996  for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1997  MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
1998  if (theMethod == nullptr) {
1999  continue;
2000  }
2001  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
2002 
2003  std::vector<TString> classnames;
2004  for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2005  classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
2006  }
2007  Log() << kINFO
2008  << "=== Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
2009  << Endl;
2010  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
2011  Log() << kINFO << "---------------------------------------------------" << Endl;
2012  printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2013  numClasses, Log());
2014  Log() << kINFO << Endl;
2015 
2016  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
2017  Log() << kINFO << "---------------------------------------------------" << Endl;
2018  printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2019  numClasses, Log());
2020  Log() << kINFO << Endl;
2021 
2022  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
2023  Log() << kINFO << "---------------------------------------------------" << Endl;
2024  printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2025  numClasses, Log());
2026  Log() << kINFO << Endl;
2027  }
2028  Log() << kINFO << hLine << Endl;
2029  Log() << kINFO << Endl;
2030 
2031  } else {
2032  // Binary classification
2033  if (fROC) {
2034  Log().EnableOutput();
2036  Log() << Endl;
2037  TString hLine = "------------------------------------------------------------------------------------------"
2038  "-------------------------";
2039  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
2040  Log() << kINFO << hLine << Endl;
2041  Log() << kINFO << "DataSet MVA " << Endl;
2042  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
2043 
2044  // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
2045  // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
2046  // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
2047  Log() << kDEBUG << hLine << Endl;
2048  for (Int_t k = 0; k < 2; k++) {
2049  if (k == 1 && nmeth_used[k] > 0) {
2050  Log() << kINFO << hLine << Endl;
2051  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2052  }
2053  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2054  TString datasetName = itrMap->first;
2055  TString methodName = mname[k][i];
2056 
2057  if (k == 1) {
2058  methodName.ReplaceAll("Variable_", "");
2059  }
2060 
2061  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
2062  if (theMethod == 0) {
2063  continue;
2064  }
2065 
2066  TMVA::DataSet *dataset = theMethod->Data();
2067  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
2068  std::vector<Bool_t> *mvaResType =
2069  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
2070 
2071  Double_t rocIntegral = 0.0;
2072  if (mvaResType->size() != 0) {
2073  rocIntegral = GetROCIntegral(datasetName, methodName);
2074  }
2075 
2076  if (sep[k][i] < 0 || sig[k][i] < 0) {
2077  // cannot compute separation/significance -> no MVA (usually for Cuts)
2078  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
2079  << Endl;
2080 
2081  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2082  // %#1.3f %#1.3f | -- --",
2083  // datasetName.Data(),
2084  // methodName.Data(),
2085  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2086  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2087  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2088  // effArea[k][i],rocIntegral) << Endl;
2089  } else {
2090  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
2091  << Endl;
2092  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
2093  // %#1.3f %#1.3f | %#1.3f %#1.3f",
2094  // datasetName.Data(),
2095  // methodName.Data(),
2096  // eff01[k][i], Int_t(1000*eff01err[k][i]),
2097  // eff10[k][i], Int_t(1000*eff10err[k][i]),
2098  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2099  // effArea[k][i],rocIntegral,
2100  // sep[k][i], sig[k][i]) << Endl;
2101  }
2102  }
2103  }
2104  Log() << kINFO << hLine << Endl;
2105  Log() << kINFO << Endl;
2106  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
2107  Log() << kINFO << hLine << Endl;
2108  Log() << kINFO
2109  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2110  << Endl;
2111  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2112  << Endl;
2113  Log() << kINFO << hLine << Endl;
2114  for (Int_t k = 0; k < 2; k++) {
2115  if (k == 1 && nmeth_used[k] > 0) {
2116  Log() << kINFO << hLine << Endl;
2117  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2118  }
2119  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2120  if (k == 1) mname[k][i].ReplaceAll("Variable_", "");
2121  MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2122  if (theMethod == 0) continue;
2123 
2124  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2125  theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2126  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2127  << Endl;
2128  }
2129  }
2130  Log() << kINFO << hLine << Endl;
2131  Log() << kINFO << Endl;
2132 
2133  if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput();
2134  } // end fROC
2135  }
2136  if(!IsSilentFile())
2137  {
2138  std::list<TString> datasets;
2139  for (Int_t k=0; k<2; k++) {
2140  for (Int_t i=0; i<nmeth_used[k]; i++) {
2141  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
2142  if(theMethod==0) continue;
2143  // write test/training trees
2144  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2145  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
2146  {
2149  datasets.push_back(theMethod->fDataSetInfo.GetName());
2150  }
2151  }
2152  }
2153  }
2154  }//end for MethodsMap
2155  // references for citation
2157 }
2158 
2159 ////////////////////////////////////////////////////////////////////////////////
2160 /// Evaluate Variable Importance
2161 
2162 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2163 {
2164  fModelPersistence=kFALSE;
2165  fSilentFile=kTRUE;//we need silent file here because we need fast classification results
2166 
2167  //getting number of variables and variable names from loader
2168  const int nbits = loader->GetDataSetInfo().GetNVariables();
2169  if(vitype==VIType::kShort)
2170  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2171  else if(vitype==VIType::kAll)
2172  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2173  else if(vitype==VIType::kRandom&&nbits>10)
2174  {
2175  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
2176  }else
2177  {
2178  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2179  return nullptr;
2180  }
2181 }
2182 
2183 ////////////////////////////////////////////////////////////////////////////////
2184 
2185 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2186 {
2187 
2188  uint64_t x = 0;
2189  uint64_t y = 0;
2190 
2191  //getting number of variables and variable names from loader
2192  const int nbits = loader->GetDataSetInfo().GetNVariables();
2193  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2194 
2195  uint64_t range = pow(2, nbits);
2196 
2197  //vector to save importances
2198  std::vector<Double_t> importances(nbits);
2199  //vector to save ROC
2200  std::vector<Double_t> ROC(range);
2201  ROC[0]=0.5;
2202  for (int i = 0; i < nbits; i++)importances[i] = 0;
2203 
2204  Double_t SROC, SSROC; //computed ROC value
2205  for ( x = 1; x <range ; x++) {
2206 
2207  std::bitset<VIBITS> xbitset(x);
2208  if (x == 0) continue; //data loader need at least one variable
2209 
2210  //creating loader for seed
2211  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2212 
2213  //adding variables from seed
2214  for (int index = 0; index < nbits; index++) {
2215  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2216  }
2217 
2218  DataLoaderCopy(seedloader,loader);
2219  seedloader->PrepareTrainingAndTestTree(loader->GetDataSetInfo().GetCut("Signal"), loader->GetDataSetInfo().GetCut("Background"), loader->GetDataSetInfo().GetSplitOptions());
2220 
2221  //Booking Seed
2222  BookMethod(seedloader, theMethod, methodTitle, theOption);
2223 
2224  //Train/Test/Evaluation
2225  TrainAllMethods();
2226  TestAllMethods();
2227  EvaluateAllMethods();
2228 
2229  //getting ROC
2230  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2231 
2232  //cleaning information to process sub-seeds
2233  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2235  delete sresults;
2236  delete seedloader;
2237  this->DeleteAllMethods();
2238 
2239  fMethodsMap.clear();
2240  //removing global result because it is requiring a lot of RAM for all seeds
2241  }
2242 
2243 
2244  for ( x = 0; x <range ; x++)
2245  {
2246  SROC=ROC[x];
2247  for (uint32_t i = 0; i < VIBITS; ++i) {
2248  if (x & (uint64_t(1) << i)) {
2249  y = x & ~(1 << i);
2250  std::bitset<VIBITS> ybitset(y);
2251  //need at least one variable
2252  //NOTE: if sub-seed is zero then is the special case
2253  //that count in xbitset is 1
2254  Double_t ny = log(x - y) / 0.693147;
2255  if (y == 0) {
2256  importances[ny] = SROC - 0.5;
2257  continue;
2258  }
2259 
2260  //getting ROC
2261  SSROC = ROC[y];
2262  importances[ny] += SROC - SSROC;
2263  //cleaning information
2264  }
2265 
2266  }
2267  }
2268  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
2269  return GetImportance(nbits,importances,varNames);
2270 }
2271 
2272 static long int sum(long int i)
2273 {
2274  long int _sum=0;
2275  for(long int n=0;n<i;n++) _sum+=pow(2,n);
2276  return _sum;
2277 }
2278 
2279 ////////////////////////////////////////////////////////////////////////////////
2280 
2281 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2282 {
2283  uint64_t x = 0;
2284  uint64_t y = 0;
2285 
2286  //getting number of variables and variable names from loader
2287  const int nbits = loader->GetDataSetInfo().GetNVariables();
2288  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2289 
2290  long int range = sum(nbits);
2291 // std::cout<<range<<std::endl;
2292  //vector to save importances
2293  std::vector<Double_t> importances(nbits);
2294  for (int i = 0; i < nbits; i++)importances[i] = 0;
2295 
2296  Double_t SROC, SSROC; //computed ROC value
2297 
2298  x = range;
2299 
2300  std::bitset<VIBITS> xbitset(x);
2301  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //data loader need at least one variable
2302 
2303 
2304  //creating loader for seed
2305  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2306 
2307  //adding variables from seed
2308  for (int index = 0; index < nbits; index++) {
2309  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2310  }
2311 
2312  //Loading Dataset
2313  DataLoaderCopy(seedloader,loader);
2314 
2315  //Booking Seed
2316  BookMethod(seedloader, theMethod, methodTitle, theOption);
2317 
2318  //Train/Test/Evaluation
2319  TrainAllMethods();
2320  TestAllMethods();
2321  EvaluateAllMethods();
2322 
2323  //getting ROC
2324  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2325 
2326  //cleaning information to process sub-seeds
2327  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2329  delete sresults;
2330  delete seedloader;
2331  this->DeleteAllMethods();
2332  fMethodsMap.clear();
2333 
2334  //removing global result because it is requiring a lot of RAM for all seeds
2335 
2336  for (uint32_t i = 0; i < VIBITS; ++i) {
2337  if (x & (1 << i)) {
2338  y = x & ~(uint64_t(1) << i);
2339  std::bitset<VIBITS> ybitset(y);
2340  //need at least one variable
2341  //NOTE: if sub-seed is zero then is the special case
2342  //that count in xbitset is 1
2343  Double_t ny = log(x - y) / 0.693147;
2344  if (y == 0) {
2345  importances[ny] = SROC - 0.5;
2346  continue;
2347  }
2348 
2349  //creating loader for sub-seed
2350  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2351  //adding variables from sub-seed
2352  for (int index = 0; index < nbits; index++) {
2353  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2354  }
2355 
2356  //Loading Dataset
2357  DataLoaderCopy(subseedloader,loader);
2358 
2359  //Booking SubSeed
2360  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2361 
2362  //Train/Test/Evaluation
2363  TrainAllMethods();
2364  TestAllMethods();
2365  EvaluateAllMethods();
2366 
2367  //getting ROC
2368  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2369  importances[ny] += SROC - SSROC;
2370 
2371  //cleaning information
2372  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2374  delete ssresults;
2375  delete subseedloader;
2376  this->DeleteAllMethods();
2377  fMethodsMap.clear();
2378  }
2379  }
2380  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
2381  return GetImportance(nbits,importances,varNames);
2382 }
2383 
2384 ////////////////////////////////////////////////////////////////////////////////
2385 
2386 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2387 {
2388  TRandom3 *rangen = new TRandom3(0); //Random Gen.
2389 
2390  uint64_t x = 0;
2391  uint64_t y = 0;
2392 
2393  //getting number of variables and variable names from loader
2394  const int nbits = loader->GetDataSetInfo().GetNVariables();
2395  std::vector<TString> varNames = loader->GetDataSetInfo().GetListOfVariables();
2396 
2397  long int range = pow(2, nbits);
2398 
2399  //vector to save importances
2400  std::vector<Double_t> importances(nbits);
2401  Double_t importances_norm = 0;
2402  for (int i = 0; i < nbits; i++)importances[i] = 0;
2403 
2404  Double_t SROC, SSROC; //computed ROC value
2405  for (UInt_t n = 0; n < nseeds; n++) {
2406  x = rangen -> Integer(range);
2407 
2408  std::bitset<32> xbitset(x);
2409  if (x == 0) continue; //data loader need at least one variable
2410 
2411 
2412  //creating loader for seed
2413  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2414 
2415  //adding variables from seed
2416  for (int index = 0; index < nbits; index++) {
2417  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2418  }
2419 
2420  //Loading Dataset
2421  DataLoaderCopy(seedloader,loader);
2422 
2423  //Booking Seed
2424  BookMethod(seedloader, theMethod, methodTitle, theOption);
2425 
2426  //Train/Test/Evaluation
2427  TrainAllMethods();
2428  TestAllMethods();
2429  EvaluateAllMethods();
2430 
2431  //getting ROC
2432  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2433 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2434 
2435  //cleaning information to process sub-seeds
2436  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2438  delete sresults;
2439  delete seedloader;
2440  this->DeleteAllMethods();
2441  fMethodsMap.clear();
2442 
2443  //removing global result because it is requiring a lot of RAM for all seeds
2444 
2445  for (uint32_t i = 0; i < 32; ++i) {
2446  if (x & (uint64_t(1) << i)) {
2447  y = x & ~(1 << i);
2448  std::bitset<32> ybitset(y);
2449  //need at least one variable
2450  //NOTE: if sub-seed is zero then is the special case
2451  //that count in xbitset is 1
2452  Double_t ny = log(x - y) / 0.693147;
2453  if (y == 0) {
2454  importances[ny] = SROC - 0.5;
2455  importances_norm += importances[ny];
2456  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2457  continue;
2458  }
2459 
2460  //creating loader for sub-seed
2461  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2462  //adding variables from sub-seed
2463  for (int index = 0; index < nbits; index++) {
2464  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2465  }
2466 
2467  //Loading Dataset
2468  DataLoaderCopy(subseedloader,loader);
2469 
2470  //Booking SubSeed
2471  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2472 
2473  //Train/Test/Evaluation
2474  TrainAllMethods();
2475  TestAllMethods();
2476  EvaluateAllMethods();
2477 
2478  //getting ROC
2479  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2480  importances[ny] += SROC - SSROC;
2481  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
2482  //cleaning information
2483  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2485  delete ssresults;
2486  delete subseedloader;
2487  this->DeleteAllMethods();
2488  fMethodsMap.clear();
2489  }
2490  }
2491  }
2492  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2493  return GetImportance(nbits,importances,varNames);
2494 }
2495 
2496 ////////////////////////////////////////////////////////////////////////////////
2497 
2498 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2499 {
2500  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2501 
2502  gStyle->SetOptStat(000000);
2503 
2504  Float_t normalization = 0.0;
2505  for (int i = 0; i < nbits; i++) {
2506  normalization = normalization + importances[i];
2507  }
2508 
2509  Float_t roc = 0.0;
2510 
2511  gStyle->SetTitleXOffset(0.4);
2512  gStyle->SetTitleXOffset(1.2);
2513 
2514 
2515  std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2516  for (Int_t i = 1; i < nbits + 1; i++) {
2517  x_ie[i - 1] = (i - 1) * 1.;
2518  roc = 100.0 * importances[i - 1] / normalization;
2519  y_ie[i - 1] = roc;
2520  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2521  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2522  vih1->SetBinContent(i, roc);
2523  }
2524  TGraph *g_ie = new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
2525  g_ie->SetTitle("");
2526 
2527  vih1->LabelsOption("v >", "X");
2528  vih1->SetBarWidth(0.97);
2529  Int_t ca = TColor::GetColor("#006600");
2530  vih1->SetFillColor(ca);
2531  //Int_t ci = TColor::GetColor("#990000");
2532 
2533  vih1->GetYaxis()->SetTitle("Importance (%)");
2534  vih1->GetYaxis()->SetTitleSize(0.045);
2535  vih1->GetYaxis()->CenterTitle();
2536  vih1->GetYaxis()->SetTitleOffset(1.24);
2537 
2538  vih1->GetYaxis()->SetRangeUser(-7, 50);
2539  vih1->SetDirectory(0);
2540 
2541 // vih1->Draw("B");
2542  return vih1;
2543 }
TMVA::MethodBase::TestClassification
virtual void TestClassification()
initialization
Definition: MethodBase.cxx:1111
TH1::LabelsOption
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:5221
m
auto * m
Definition: textangle.C:8
TPrincipal.h
n
const Int_t n
Definition: legend1.C:16
TMVA::Factory::fModelPersistence
Bool_t fModelPersistence
the training type
Definition: Factory.h:218
ResultsClassification.h
TMVA::Factory::EvaluateImportanceRandom
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2386
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::Configurable::Log
MsgLogger & Log() const
Definition: Configurable.h:164
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:153
DataSetManager.h
TMVA::Factory::fVerboseLevel
TString fVerboseLevel
verbose mode
Definition: Factory.h:210
TMVA::MethodBase::GetSeparation
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Definition: MethodBase.cxx:2775
TGraph::SetTitle
virtual void SetTitle(const char *title="")
Change (i.e.
Definition: TGraph.cxx:2324
TMVA::MethodBase::Data
DataSet * Data() const
Definition: MethodBase.h:408
ROCCalc.h
TMVA::MethodBase::SetModelPersistence
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:381
TMVA::MethodBase::GetROCIntegral
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Definition: MethodBase.cxx:2808
TMVA::Ranking::Print
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
TMVA::Configurable
Definition: Configurable.h:66
TMVA::DataSetInfo::IsSignal
Bool_t IsSignal(const Event *ev) const
Definition: DataSetInfo.cxx:167
TMVA::DataLoader::PrepareTrainingAndTestTree
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:631
TMVA::MethodBase::GetAnalysisType
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:437
TPrincipal::AddRow
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
TMVA::MethodBoost::SetBoostedMethodName
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:112
TMVA::Tools::SplitString
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings
Definition: Tools.cxx:1211
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:152
TMVA::MethodBase::SetupMethod
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:406
TPrincipal::GetCovarianceMatrix
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:59
TMVA::ResultsMulticlass
Definition: ResultsMulticlass.h:80
TString::Data
const char * Data() const
Definition: TString.h:369
TMVA::DataSetInfo::GetDataSetManager
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:194
TMVA::Factory::MakeClass
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1276
TMVA::Factory::PrintHelpMessage
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1304
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::Configurable::AddPreDefVal
void AddPreDefVal(const T &)
Definition: Configurable.h:168
TMVA::DataLoaderCopy
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
READXML
#define READXML
Definition: Factory.cxx:100
TMVA::Ranking
Definition: Ranking.h:70
TMVA::DataSetInfo::GetDataSet
DataSet * GetDataSet() const
returns data set
Definition: DataSetInfo.cxx:480
TPrincipal::MakePrincipals
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
TMVA::Tools::UsefulSortAscending
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:550
TAxis::SetRangeUser
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:942
TMVA::Factory::OptimizeAllMethods
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:691
TMVA::DataSet::SetCurrentType
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:112
TGraph.h
IMethod.h
TMVA::MethodBase::SetSilentFile
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:377
MinNoTrainingEvents
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:95
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:762
TMVA::MethodBase::DeclareCompatibilityOptions
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:596
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:168
Ranking.h
TMVA::DataSetInfo::GetNVariables
UInt_t GetNVariables() const
Definition: DataSetInfo.h:127
TMVA::TransformationHandler
Definition: TransformationHandler.h:81
TMVA::MethodBase::TrainMethod
void TrainMethod()
Definition: MethodBase.cxx:650
DataLoader.h
Float_t
float Float_t
Definition: RtypesCore.h:57
log
double log(double)
VariableInfo.h
TStyle.h
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:168
TMVA::Factory::MVector
std::vector< IMethod * > MVector
Definition: Factory.h:84
Int_t
int Int_t
Definition: RtypesCore.h:45
TMVA::Factory::TestAllMethods
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Definition: Factory.cxx:1241
TMVA::Config::SetUseColor
void SetUseColor(Bool_t uc)
Definition: Config.h:85
TMVA::MethodBase::fDataSetInfo
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:605
TNamed::fName
TString fName
Definition: TNamed.h:38
TString::Contains
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
TMVA::ROCCurve::GetROCIntegral
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:250
TH1::SetBinContent
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8677
TMVA::Factory::GetImportance
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2498
x
Double_t x[n]
Definition: legend1.C:17
TString::Length
Ssiz_t Length() const
Definition: TString.h:410
TSystem::MakeDirectory
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:826
TMVA::Config::SetSilent
void SetSilent(Bool_t s)
Definition: Config.h:88
MethodBase.h
TStyle::SetTitleXOffset
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:392
TMVA::Event::SetIsTraining
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:391
TMVA::MsgLogger::InhibitOutput
static void InhibitOutput()
Definition: MsgLogger.cxx:73
TMVA::Factory::GetROCIntegral
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
Definition: Factory.cxx:839
TMVA::Factory::fgTargetFile
TFile * fgTargetFile
Definition: Factory.h:201
TAxis::CenterTitle
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:184
TCanvas.h
TMVA::Factory::fVerbose
Bool_t fVerbose
list of transformations to test
Definition: Factory.h:209
TMVA::Factory::fCorrelations
Bool_t fCorrelations
verbosity level, controls granularity of logging
Definition: Factory.h:211
TString
Definition: TString.h:136
TMultiGraph::Draw
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
Definition: TMultiGraph.cxx:541
TMVA::Factory::BookMethodWeightfile
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Definition: Factory.cxx:494
TMatrixT
Definition: TMatrixDfwd.h:22
TMVA::MethodCategory
Definition: MethodCategory.h:83
TMVA::DataSetInfo::CorrelationMatrix
const TMatrixD * CorrelationMatrix(const TString &className) const
Definition: DataSetInfo.cxx:197
v
@ v
Definition: rootcling_impl.cxx:3635
TFile.h
TMVA::DataSetInfo::GetVariableInfo
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:105
bool
TTree::Write
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9563
TPad::BuildLegend
virtual TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
Definition: TPad.cxx:494
TMVA::Config::VariablePlotting::fNbinsXOfROCCurve
Int_t fNbinsXOfROCCurve
Definition: Config.h:136
TString::ReplaceAll
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:692
TMVA::MethodBase::DoMulticlass
Bool_t DoMulticlass() const
Definition: MethodBase.h:439
TMatrixDSym.h
TAttAxis::SetTitleSize
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
Definition: TAttAxis.cxx:303
TMVA::Factory::Greetings
void Greetings()
Print welcome message.
Definition: Factory.cxx:286
TMultiGraph.h
TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition: MethodBase.h:409
TMVA::ResultsClassification
Definition: ResultsClassification.h:71
TROOT.h
TH1::SetTitle
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6344
TMVA::Factory::Verbose
Bool_t Verbose(void) const
Definition: Factory.h:134
TMVA::ClassifierFactory::Instance
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Definition: ClassifierFactory.cxx:48
TMVA::Types::kCuts
@ kCuts
Definition: Types.h:103
TMVA::DataSet::GetNEvtBkgdTest
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
TMVA::DataSetInfo::GetNClasses
UInt_t GetNClasses() const
Definition: DataSetInfo.h:155
TObject::kOverwrite
@ kOverwrite
overwrite existing object with same name
Definition: TObject.h:88
VariableTransform.h
TMVA::Tools::TMVAVersionMessage
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1328
TMVA::Event::GetValue
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
TMVA::DataSetInfo
Definition: DataSetInfo.h:62
TMVA::DataSetInfo::GetCut
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:168
TMultiGraph::GetXaxis
TAxis * GetXaxis()
Get x axis of the graph.
Definition: TMultiGraph.cxx:1127
TMVA::Factory::GetROC
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
Definition: Factory.cxx:740
DataInputHandler.h
TMVA::MethodBase::GetMethodName
const TString & GetMethodName() const
Definition: MethodBase.h:330
MsgLogger.h
MethodBoost.h
TMVA::DataSet::GetEventCollection
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:216
TMultiGraph::GetYaxis
TAxis * GetYaxis()
Get y axis of the graph.
Definition: TMultiGraph.cxx:1139
TMVA::DataSetManager::DataInput
DataInputHandler & DataInput()
Definition: DataSetManager.h:98
TLeaf.h
TMVA::Tools::FormattedOutput
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:899
ResultsRegression.h
gStyle
R__EXTERN TStyle * gStyle
Definition: TStyle.h:412
TSystem.h
TMVA::Configurable::CheckForUnusedOptions
void CheckForUnusedOptions() const
checks for unused options in option string
Definition: Configurable.cxx:270
TH1::GetYaxis
TAxis * GetYaxis()
Definition: TH1.h:318
TMVA::MethodBoost
Definition: MethodBoost.h:84
TRandom3
Definition: TRandom3.h:27
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:150
h
#define h(i)
Definition: RSha256.hxx:124
TMVA::DataSet::GetTree
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
Definition: DataSet.cxx:609
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:166
TMath::IsNaN
Bool_t IsNaN(Double_t x)
Definition: TMath.h:894
TMVA::Factory::DeleteAllMethods
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:314
TMVA::MethodBase::CheckSetup
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:433
TMVA::DataSet::GetEvent
const Event * GetEvent() const
Definition: DataSet.cxx:202
TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
TMultiGraph::GetHistogram
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
Definition: TMultiGraph.cxx:1049
TMVA::gConfig
Config & gConfig()
TMVA::DataSetInfo::GetClassInfo
ClassInfo * GetClassInfo(Int_t clNum) const
Definition: DataSetInfo.cxx:146
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::IMethod::MakeClass
virtual void MakeClass(const TString &classFileName=TString("")) const =0
TH1::AddDirectory
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1225
TMVA::MethodBase::WriteEvaluationHistosToFile
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Definition: MethodBase.cxx:2080
TMVA::MethodBase::TestRegression
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
Definition: MethodBase.cxx:977
TPrincipal
Definition: TPrincipal.h:21
TNamed::Clone
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:74
TMVA::DataSet
Definition: DataSet.h:81
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:151
TMVA::Factory::GetROCCurveAsMultiGraph
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:969
TMVA::Factory::WriteDataInformation
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:593
TMVA::MsgLogger::SetMinType
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:120
TMVA::Config::SetDrawProgressBar
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:94
TMVA::Types::kCategory
@ kCategory
Definition: Types.h:122
TH2
Definition: TH2.h:30
TMVA::Factory::EvaluateImportance
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2162
TMVA::MethodBase::ReadStateFromFile
void ReadStateFromFile()
Function to write options and weights to file.
Definition: MethodBase.cxx:1412
TMVA::Config::IONames::fWeightFileDir
TString fWeightFileDir
Definition: Config.h:147
TMVA::Factory::Factory
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:115
TMVA::Factory
Definition: Factory.h:80
TMVA::Tools::ROOTVersionMessage
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1337
TString::BeginsWith
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:615
TMVA::MethodBase::OptimizeTuningParameters
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:623
TMVA::MethodBase::SetWeightFileDir
void SetWeightFileDir(TString fileDir)
set directory of weight file
Definition: MethodBase.cxx:2045
TH1::SetDirectory
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8392
y
Double_t y[n]
Definition: legend1.C:17
TMVA::MethodBase::GetMethodTypeName
TString GetMethodTypeName() const
Definition: MethodBase.h:331
TColor::GetColor
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
Definition: TColor.cxx:1766
TMVA::MethodBase
Definition: MethodBase.h:111
ROCCurve.h
TMVA::Types
Definition: Types.h:96
Types.h
TNamed::SetTitle
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
Configurable.h
TMVA::Results
Definition: Results.h:57
TH2.h
TMVA::DataSetInfo::GetSplitOptions
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:186
TMVA::Factory::fAnalysisType
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:217
TStyle::SetOptStat
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1592
TMVA::Factory::EvaluateImportanceAll
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2185
TFile
Definition: TFile.h:54
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:182
Config.h
TMVA::Configurable::GetOptions
const TString & GetOptions() const
Definition: Configurable.h:126
unsigned int
TMVA::MethodBase::DoRegression
Bool_t DoRegression() const
Definition: MethodBase.h:438
TMVA::TMVAGlob::GetMethodName
void GetMethodName(TString &name, TKey *mkey)
Definition: tmvaglob.cxx:335
TMVA::IMethod
Definition: IMethod.h:53
TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:167
TMVA::Factory::GetMethod
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:558
TMVA::DataSet::GetNEvtSigTest
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
gSystem
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
TMultiGraph
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:36
TMVA::Tools::UsefulSortDescending
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:576
TMVA::DataSetInfo::GetNTargets
UInt_t GetNTargets() const
Definition: DataSetInfo.h:128
VIBITS
#define VIBITS
Definition: Factory.cxx:103
TAxis::SetBinLabel
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:823
sum
static long int sum(long int i)
Definition: Factory.cxx:2272
TMVA::Factory::BookMethod
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:342
TMVA::Types::kMaxAnalysisType
@ kMaxAnalysisType
Definition: Types.h:155
TMultiGraph::GetListOfGraphs
TList * GetListOfGraphs() const
Definition: TMultiGraph.h:70
TMVA::MethodBase::GetSignificance
virtual Double_t GetSignificance() const
compute significance of mean difference
Definition: MethodBase.cxx:2762
TString::CompareTo
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
TMVA::Factory::~Factory
virtual ~Factory()
Destructor.
Definition: Factory.cxx:296
TString::IsNull
Bool_t IsNull() const
Definition: TString.h:407
Double_t
double Double_t
Definition: RtypesCore.h:59
TGraph
Definition: TGraph.h:41
TMVA::MsgLogger
Definition: MsgLogger.h:83
TMVA::MethodBase::MakeClass
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
Definition: MethodBase.cxx:2993
TMVA::DataSetInfo::CreateCorrelationMatrixHist
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
Definition: DataSetInfo.cxx:416
TMVA::MethodBase::SetAnalysisType
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:436
Cppyy::GetMethod
RPY_EXPORTED TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
Definition: clingwrapper.cxx:1384
TMVA::ROCCurve::GetROCCurve
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:276
TCanvas
Definition: TCanvas.h:23
TMVA::Types::Instance
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:69
TMVA::Factory::EvaluateAllVariables
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1331
TMVA::MethodBoost::fDataSetManager
DataSetManager * fDataSetManager
Definition: MethodBoost.h:219
TMatrixD
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
TMVA::IMethod::HasAnalysisType
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
TMVA::DataSet::GetResults
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
TMVA::Types::EMVA
EMVA
Definition: Types.h:101
TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:572
TMVA::Config::GetIONames
IONames & GetIONames()
Definition: Config.h:123
TMVA::DataSetInfo::GetName
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:71
TMVA::Factory::HasMethod
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:576
TMVA::MethodBase::GetMulticlassConfusionMatrix
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
Definition: MethodBase.cxx:2736
graph
Definition: graph.py:1
TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:333
TMVA::Event
Definition: Event.h:51
TMVA::Configurable::fLogger
MsgLogger * fLogger
Definition: Configurable.h:170
TMultiGraph::Add
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
Definition: TMultiGraph.cxx:451
Factory.h
TMVA::Configurable::SetConfigDescription
void SetConfigDescription(const char *d)
Definition: Configurable.h:106
TMVA::MethodBase::GetEfficiency
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
Definition: MethodBase.cxx:2288
TMVA::MethodBase::TestMulticlass
virtual void TestMulticlass()
test multiclass classification
Definition: MethodBase.cxx:1084
TMVA::VariableInfo::GetLabel
const TString & GetLabel() const
Definition: VariableInfo.h:105
TMVA::MethodCategory::fDataSetManager
DataSetManager * fDataSetManager
Definition: MethodCategory.h:156
name
char name[80]
Definition: TGX11.cxx:110
TPad::SetGrid
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Definition: TPad.h:327
TMVA::MethodBase::GetMethodType
Types::EMVA GetMethodType() const
Definition: MethodBase.h:332
TMVA::MsgLogger::SetSource
void SetSource(const std::string &source)
Definition: MsgLogger.h:118
TMVA::MethodBase::SetFile
void SetFile(TFile *file)
Definition: MethodBase.h:374
TMVA::Tools::GetCorrelationMatrix
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
TMVA::MethodBase::PrintHelpMessage
void PrintHelpMessage() const
prints out method-specific help method
Definition: MethodBase.cxx:3254
TMVA::Factory::fTransformations
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:208
TMVA::Factory::fROC
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:212
TMVA::MethodBase::GetTrainingEfficiency
virtual Double_t GetTrainingEfficiency(const TString &)
Definition: MethodBase.cxx:2514
ResultsMulticlass.h
Tools.h
TMVA::DataSet::GetNTrainingEvents
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:91
TMVA::MethodBase::AddOutput
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
Definition: MethodBase.cxx:1301
TNamed::GetName
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:53
TMVA::Factory::EvaluateAllMethods
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
Definition: Factory.cxx:1346
ClassifierFactory.h
TMVA::Config::fVariablePlotting
class TMVA::Config::VariablePlotting fVariablePlotting
type
int type
Definition: TGX11.cxx:121
TMVA::Factory::TrainAllMethods
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1090
TMVA::DataLoader::AddVariable
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:484
TAttAxis::SetTitleOffset
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
Definition: TAttAxis.cxx:293
TMVA::Config::IONames::fWeightFileDirPrefix
TString fWeightFileDirPrefix
Definition: Config.h:146
TMVA::ROCCurve
Definition: ROCCurve.h:45
TString::ToLower
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
TH1::GetXaxis
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:317
TMVA::DataInputHandler::GetEntries
UInt_t GetEntries(const TString &name) const
Definition: DataInputHandler.h:122
Results.h
TMVA::TransformationHandler::PrintVariableRanking
void PrintVariableRanking() const
prints ranking of input variables
Definition: TransformationHandler.cxx:924
TMVA::Factory::EvaluateImportanceShort
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2281
ROOT::Math::detail::sep
@ sep
Definition: GenVectorIO.h:55
TMVA::gTools
Tools & gTools()
TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
pow
double pow(double, double)
TMVA::Tools::TMVACitation
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1453
TMVA::Tools::TMVAWelcomeMessage
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1314
MethodCategory.h
TH1::SetBarWidth
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:357
TMVA::DataLoader::GetDataSetInfo
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:137
DataSet.h
TMatrixF.h
TMVA::Configurable::ParseOptions
virtual void ParseOptions()
options parser
Definition: Configurable.cxx:124
TMVA::ClassifierFactory::Create
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
Definition: ClassifierFactory.cxx:89
TMVA::Factory::SetVerbose
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:334
TMVA::Types::kNoAnalysisType
@ kNoAnalysisType
Definition: Types.h:154
TMVA::MethodBase::ProcessSetup
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:423
TMVA::Tools::kHtmlLink
@ kHtmlLink
Definition: Tools.h:214
TMVA::Configurable::SetConfigName
void SetConfigName(const char *n)
Definition: Configurable.h:105
TMath.h
TMVA::DataSetInfo::GetListOfVariables
std::vector< TString > GetListOfVariables() const
returns list of variables
Definition: DataSetInfo.cxx:393
TMVA::Factory::GetROCCurve
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:900
gROOT
#define gROOT
Definition: TROOT.h:406
TMVA::IMethod::PrintHelpMessage
virtual void PrintHelpMessage() const =0
int
TMVA::CreateVariableTransforms
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Definition: VariableTransform.cxx:81
TMVA::ROCCurve::GetEffSForEffB
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Definition: ROCCurve.cxx:219
TMVA::DataLoader
Definition: DataLoader.h:50
TEventList.h