Logo ROOT   6.10/09
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Kim Albertsson
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25  * *
26  * Copyright (c) 2005-2015: *
27  * CERN, Switzerland *
28  * U. of Victoria, Canada *
29  * MPI-K Heidelberg, Germany *
30  * U. of Bonn, Germany *
31  * UdeA/ITM, Colombia *
32  * U. of Florida, USA *
33  * *
34  * Redistribution and use in source and binary forms, with or without *
35  * modification, are permitted according to the terms listed in LICENSE *
36  * (http://tmva.sourceforge.net/LICENSE) *
37  **********************************************************************************/
38 
39 /*! \class TMVA::Factory
40 \ingroup TMVA
41 
42 This is the main MVA steering class.
43 It creates all MVA methods, and guides them through the training, testing and
44 evaluation phases.
45 */
46 
47 #include "TMVA/Factory.h"
48 
49 #include "TMVA/ClassifierFactory.h"
50 #include "TMVA/Config.h"
51 #include "TMVA/Configurable.h"
52 #include "TMVA/Tools.h"
53 #include "TMVA/Ranking.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/IMethod.h"
56 #include "TMVA/MethodBase.h"
57 #include "TMVA/DataInputHandler.h"
58 #include "TMVA/DataSetManager.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/DataLoader.h"
61 #include "TMVA/MethodBoost.h"
62 #include "TMVA/MethodCategory.h"
63 #include "TMVA/ROCCalc.h"
64 #include "TMVA/ROCCurve.h"
65 #include "TMVA/MsgLogger.h"
66 
67 #include "TMVA/VariableInfo.h"
68 #include "TMVA/VariableTransform.h"
69 
70 #include "TMVA/Results.h"
72 #include "TMVA/ResultsRegression.h"
73 #include "TMVA/ResultsMulticlass.h"
74 #include <list>
75 #include <bitset>
76 
77 #include "TMVA/Types.h"
78 
79 #include "TROOT.h"
80 #include "TFile.h"
81 #include "TTree.h"
82 #include "TLeaf.h"
83 #include "TEventList.h"
84 #include "TH2.h"
85 #include "TText.h"
86 #include "TLegend.h"
87 #include "TGraph.h"
88 #include "TStyle.h"
89 #include "TMatrixF.h"
90 #include "TMatrixDSym.h"
91 #include "TMultiGraph.h"
92 #include "TPaletteAxis.h"
93 #include "TPrincipal.h"
94 #include "TMath.h"
95 #include "TObjString.h"
96 #include "TSystem.h"
97 #include "TCanvas.h"
98 
100 //const Int_t MinNoTestEvents = 1;
101 
103 
104 #define READXML kTRUE
105 
106 //number of bits for bitset
107 #define VIBITS 32
108 
109 
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Standard constructor.
113 ///
114 /// - jobname : this name will appear in all weight file names produced by the MVAs
115 /// - theTargetFile : output ROOT file; the test tree and all evaluation plots
116 /// will be stored here
117 /// - theOption : option string; currently: "V" for verbose
118 
119 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
120 : Configurable ( theOption ),
121  fTransformations ( "I" ),
122  fVerbose ( kFALSE ),
123  fCorrelations ( kFALSE ),
124  fROC ( kTRUE ),
125  fSilentFile ( kFALSE ),
126  fJobName ( jobName ),
127  fAnalysisType ( Types::kClassification ),
128  fModelPersistence (kTRUE)
129 {
130  fgTargetFile = theTargetFile;
132 
133  // render silent
134  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
135 
136 
137  // init configurable
138  SetConfigDescription( "Configuration options for Factory running" );
139  SetConfigName( GetName() );
140 
141  // histograms are not automatically associated with the current
142  // directory and hence don't go out of scope when closing the file
143  // TH1::AddDirectory(kFALSE);
144  Bool_t silent = kFALSE;
145 #ifdef WIN32
146  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
147  Bool_t color = kFALSE;
148  Bool_t drawProgressBar = kFALSE;
149 #else
150  Bool_t color = !gROOT->IsBatch();
151  Bool_t drawProgressBar = kTRUE;
152 #endif
153  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
154  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
155  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
156  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
157  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
158  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
159  DeclareOptionRef( drawProgressBar,
160  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
162  "ModelPersistence",
163  "Option to save the trained model in xml file or using serialization");
164 
165  TString analysisType("Auto");
166  DeclareOptionRef( analysisType,
167  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
168  AddPreDefVal(TString("Classification"));
169  AddPreDefVal(TString("Regression"));
170  AddPreDefVal(TString("Multiclass"));
171  AddPreDefVal(TString("Auto"));
172 
173  ParseOptions();
175 
176  if (Verbose()) Log().SetMinType( kVERBOSE );
177 
178  // global settings
179  gConfig().SetUseColor( color );
180  gConfig().SetSilent( silent );
181  gConfig().SetDrawProgressBar( drawProgressBar );
182 
183  analysisType.ToLower();
184  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
185  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
186  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
187  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
188 
189 // Greetings();
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// Constructor.
194 
196 : Configurable ( theOption ),
197  fTransformations ( "I" ),
198  fVerbose ( kFALSE ),
199  fCorrelations ( kFALSE ),
200  fROC ( kTRUE ),
201  fSilentFile ( kTRUE ),
202  fJobName ( jobName ),
203  fAnalysisType ( Types::kClassification ),
205 {
206  fgTargetFile = 0;
208 
209 
210  // render silent
211  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
212 
213 
214  // init configurable
215  SetConfigDescription( "Configuration options for Factory running" );
216  SetConfigName( GetName() );
217 
218  // histograms are not automatically associated with the current
219  // directory and hence don't go out of scope when closing the file
221  Bool_t silent = kFALSE;
222 #ifdef WIN32
223  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
224  Bool_t color = kFALSE;
225  Bool_t drawProgressBar = kFALSE;
226 #else
227  Bool_t color = !gROOT->IsBatch();
228  Bool_t drawProgressBar = kTRUE;
229 #endif
230  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
231  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
232  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
233  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
234  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
235  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
236  DeclareOptionRef( drawProgressBar,
237  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
239  "ModelPersistence",
240  "Option to save the trained model in xml file or using serialization");
241 
242  TString analysisType("Auto");
243  DeclareOptionRef( analysisType,
244  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
245  AddPreDefVal(TString("Classification"));
246  AddPreDefVal(TString("Regression"));
247  AddPreDefVal(TString("Multiclass"));
248  AddPreDefVal(TString("Auto"));
249 
250  ParseOptions();
252 
253  if (Verbose()) Log().SetMinType( kVERBOSE );
254 
255  // global settings
256  gConfig().SetUseColor( color );
257  gConfig().SetSilent( silent );
258  gConfig().SetDrawProgressBar( drawProgressBar );
259 
260  analysisType.ToLower();
261  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
262  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
263  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
264  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
265 
266  Greetings();
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////////
270 /// Print welcome message.
271 /// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
272 
274 {
276  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
277  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 
283 {
284  return fSilentFile;
285 }
286 
287 ////////////////////////////////////////////////////////////////////////////////
288 
290 {
291  return fModelPersistence;
292 }
293 
294 ////////////////////////////////////////////////////////////////////////////////
295 /// Destructor.
296 
298 {
299  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
300  for (;trfIt != fDefaultTrfs.end(); trfIt++) delete (*trfIt);
301 
302  this->DeleteAllMethods();
303 
304 
305  // problem with call of REGISTER_METHOD macro ...
306  // ClassifierFactory::DestroyInstance();
307  // Types::DestroyInstance();
310 }
311 
312 ////////////////////////////////////////////////////////////////////////////////
313 /// Delete methods.
314 
316 {
317  std::map<TString,MVector*>::iterator itrMap;
318 
319  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
320  {
321  MVector *methods=itrMap->second;
322  // delete methods
323  MVector::iterator itrMethod = methods->begin();
324  for (; itrMethod != methods->end(); itrMethod++) {
325  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
326  delete (*itrMethod);
327  }
328  methods->clear();
329  delete methods;
330  }
331 }
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 
336 {
337  fVerbose = v;
338 }
339 
340 ////////////////////////////////////////////////////////////////////////////////
341 /// Book a classifier or regression method.
342 
343 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
344 {
345  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
346 
347  TString datasetname=loader->GetName();
348 
350  if( loader->DefaultDataSetInfo().GetNClasses()==2
351  && loader->DefaultDataSetInfo().GetClassInfo("Signal") != NULL
352  && loader->DefaultDataSetInfo().GetClassInfo("Background") != NULL
353  ){
354  fAnalysisType = Types::kClassification; // default is classification
355  } else if( loader->DefaultDataSetInfo().GetNClasses() >= 2 ){
356  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
357  } else
358  Log() << kFATAL << "No analysis type for " << loader->DefaultDataSetInfo().GetNClasses() << " classes and "
359  << loader->DefaultDataSetInfo().GetNTargets() << " regression targets." << Endl;
360  }
361 
362  // booking via name; the names are translated into enums and the
363  // corresponding overloaded BookMethod is called
364 
365  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
366  {
367  if (GetMethod( datasetname,methodTitle ) != 0) {
368  Log() << kFATAL << "Booking failed since method with title <"
369  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
370  << Endl;
371  }
372  }
373 
374 
375  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
376  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
377  << gTools().Color("reset") << Endl << Endl;
378 
379  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
380  Int_t boostNum = 0;
381  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
382  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
383  "Number of times the classifier will be boosted" );
384  conf->ParseOptions();
385  delete conf;
386  TString fFileDir;
388  {
389  fFileDir=loader->GetName();
390  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
391  }
392  // initialize methods
393  IMethod* im;
394  if (!boostNum) {
395  im = ClassifierFactory::Instance().Create( std::string(theMethodName),
396  fJobName,
397  methodTitle,
398  loader->DefaultDataSetInfo(),
399  theOption );
400  }
401  else {
402  // boosted classifier, requires a specific definition, making it transparent for the user
403  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
404  im = ClassifierFactory::Instance().Create( std::string("Boost"),
405  fJobName,
406  methodTitle,
407  loader->DefaultDataSetInfo(),
408  theOption );
409  MethodBoost* methBoost = dynamic_cast<MethodBoost*>(im); // DSMTEST divided into two lines
410  if (!methBoost) // DSMTEST
411  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
412 
413  if(fModelPersistence) methBoost->SetWeightFileDir(fFileDir);
415  methBoost->SetBoostedMethodName( theMethodName ); // DSMTEST divided into two lines
416  methBoost->fDataSetManager = loader->fDataSetManager; // DSMTEST
417  methBoost->SetFile(fgTargetFile);
418  methBoost->SetSilentFile(IsSilentFile());
419  }
420 
421  MethodBase *method = dynamic_cast<MethodBase*>(im);
422  if (method==0) return 0; // could not create method
423 
424  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
425  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
426  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
427  if (!methCat) // DSMTEST
428  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
429 
430  if(fModelPersistence) methCat->SetWeightFileDir(fFileDir);
432  methCat->fDataSetManager = loader->fDataSetManager; // DSMTEST
433  methCat->SetFile(fgTargetFile);
434  methCat->SetSilentFile(IsSilentFile());
435  } // DSMTEST
436 
437 
438  if (!method->HasAnalysisType( fAnalysisType,
439  loader->DefaultDataSetInfo().GetNClasses(),
440  loader->DefaultDataSetInfo().GetNTargets() )) {
441  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
443  Log() << "regression with " << loader->DefaultDataSetInfo().GetNTargets() << " targets." << Endl;
444  }
445  else if (fAnalysisType == Types::kMulticlass ) {
446  Log() << "multiclass classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
447  }
448  else {
449  Log() << "classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
450  }
451  return 0;
452  }
453 
454  if(fModelPersistence) method->SetWeightFileDir(fFileDir);
456  method->SetAnalysisType( fAnalysisType );
457  method->SetupMethod();
458  method->ParseOptions();
459  method->ProcessSetup();
460  method->SetFile(fgTargetFile);
461  method->SetSilentFile(IsSilentFile());
462 
463  // check-for-unused-options is performed; may be overridden by derived classes
464  method->CheckSetup();
465 
466  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
467  {
468  MVector *mvector=new MVector;
469  fMethodsMap[datasetname]=mvector;
470  }
471  fMethodsMap[datasetname]->push_back( method );
472  return method;
473 }
474 
475 ////////////////////////////////////////////////////////////////////////////////
476 /// Books MVA method. The option configuration string is custom for each MVA
477 /// the TString field "theNameAppendix" serves to define (and distinguish)
478 /// several instances of a given MVA, eg, when one wants to compare the
479 /// performance of various configurations
480 
482 {
483  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
484 }
485 
486 ////////////////////////////////////////////////////////////////////////////////
487 /// Returns pointer to MVA that corresponds to given method title.
488 
489 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
490 {
491  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
492 
493  MVector *methods=fMethodsMap.find(datasetname)->second;
494 
495  MVector::const_iterator itrMethod;
496  //
497  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
498  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
499  if ( (mva->GetMethodName())==methodTitle ) return mva;
500  }
501  return 0;
502 }
503 
504 ////////////////////////////////////////////////////////////////////////////////
505 /// Checks whether a given method name is defined for a given dataset.
506 
507 Bool_t TMVA::Factory::HasMethod(const TString& datasetname, const TString &methodTitle ) const
508 {
509  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
510 
511  std::string methodName = methodTitle.Data();
512  auto isEqualToMethodName = [&methodName](TMVA::IMethod * m) {
513  return ( 0 == methodName.compare( m->GetName() ) );
514  };
515 
516  TMVA::Factory::MVector * methods = this->fMethodsMap.at(datasetname);
517  Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
518 
519  return isMethodNameExisting;
520 }
521 
522 ////////////////////////////////////////////////////////////////////////////////
523 
525 {
526  RootBaseDir()->cd();
527 
528  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
529  else return; //loader is now in the output file, we dont need to save again
530 
531  RootBaseDir()->cd(fDataSetInfo.GetName());
532  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
533 
534 
535  // correlation matrix of the default DS
536  const TMatrixD* m(0);
537  const TH2* h(0);
538 
540  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
541  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
542  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
543  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
544  if (h!=0) {
545  h->Write();
546  delete h;
547  }
548  }
549  }
550  else{
551  m = fDataSetInfo.CorrelationMatrix( "Signal" );
552  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
553  if (h!=0) {
554  h->Write();
555  delete h;
556  }
557 
558  m = fDataSetInfo.CorrelationMatrix( "Background" );
559  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
560  if (h!=0) {
561  h->Write();
562  delete h;
563  }
564 
565  m = fDataSetInfo.CorrelationMatrix( "Regression" );
566  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
567  if (h!=0) {
568  h->Write();
569  delete h;
570  }
571  }
572 
573  // some default transformations to evaluate
574  // NOTE: all transformations are destroyed after this test
575  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
576 
577  // plus some user defined transformations
578  processTrfs = fTransformations;
579 
580  // remove any trace of identity transform - if given (avoid to apply it twice)
581  std::vector<TMVA::TransformationHandler*> trfs;
582  TransformationHandler* identityTrHandler = 0;
583 
584  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
585  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
586  for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
587  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
588  TString trfS = (*trfsDefIt);
589 
590  //Log() << kINFO << Endl;
591  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
593  fDataSetInfo,
594  *(trfs.back()),
595  Log() );
596 
597  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
598  }
599 
600  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
601 
602  // apply all transformations
603  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
604 
605  for (;trfIt != trfs.end(); trfIt++) {
606  // setting a Root dir causes the variables distributions to be saved to the root file
607  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
608  (*trfIt)->CalcTransformations(inputEvents);
609  }
610  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
611 
612  // clean up
613  for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++) delete *trfIt;
614 }
615 
616 ////////////////////////////////////////////////////////////////////////////////
617 /// Iterates through all booked methods and sees if they use parameter tuning and if so..
618 /// does just that i.e. calls "Method::Train()" for different parameter settings and
619 /// keeps in mind the "optimal one"... and that's the one that will later on be used
620 /// in the main training loop.
621 
622 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
623 {
624 
625  std::map<TString,MVector*>::iterator itrMap;
626  std::map<TString,Double_t> TunedParameters;
627  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
628  {
629  MVector *methods=itrMap->second;
630 
631  MVector::iterator itrMethod;
632 
633  // iterate over methods and optimize
634  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
636  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
637  if (!mva) {
638  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
639  return TunedParameters;
640  }
641 
642  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
643  Log() << kWARNING << "Method " << mva->GetMethodName()
644  << " not trained (training tree has less entries ["
645  << mva->Data()->GetNTrainingEvents()
646  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
647  continue;
648  }
649 
650  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
651  << (fAnalysisType == Types::kRegression ? "Regression" :
652  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
653 
654  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
655  Log() << kINFO << "Optimization of tuning parameters finished for Method:"<<mva->GetName() << Endl;
656  }
657  }
658 
659  return TunedParameters;
660 
661 }
662 
663 ////////////////////////////////////////////////////////////////////////////////
664 /// Private method to generate an instance of a ROCCurve regardless of
665 /// analysis type.
666 ///
667 /// \note You own the retured pointer.
668 ///
669 
671 {
672  return GetROC((TString)loader->GetName(), theMethodName, iClass);
673 }
674 
675 ////////////////////////////////////////////////////////////////////////////////
676 /// Private method to generate an instance of a ROCCurve regardless of
677 /// analysis type.
678 ///
679 /// \note You own the retured pointer.
680 ///
681 
682 TMVA::ROCCurve *TMVA::Factory::GetROC(TString datasetname, TString theMethodName, UInt_t iClass)
683 {
684  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
685  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
686  return nullptr;
687  }
688 
689  if (!this->HasMethod(datasetname, theMethodName)) {
690  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
691  << Endl;
692  return nullptr;
693  }
694 
695  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
696  if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
697  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
698  << Endl;
699  return nullptr;
700  }
701 
702  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
703  TMVA::DataSet *dataset = method->Data();
704  TMVA::Results *results = dataset->GetResults(theMethodName, Types::kTesting, this->fAnalysisType);
705 
706  UInt_t nClasses = method->DataInfo().GetNClasses();
707  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
708  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
709  iClass, nClasses)
710  << Endl;
711  return nullptr;
712  }
713 
714  TMVA::ROCCurve *rocCurve = nullptr;
715  if (this->fAnalysisType == Types::kClassification) {
716 
717  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
718  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
719  std::vector<Float_t> mvaResWeights;
720 
721  auto eventCollection = dataset->GetEventCollection(Types::kTesting);
722  mvaResWeights.reserve(eventCollection.size());
723  for (auto ev : eventCollection) {
724  mvaResWeights.push_back(ev->GetWeight());
725  }
726 
727  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
728 
729  } else if (this->fAnalysisType == Types::kMulticlass) {
730  std::vector<Float_t> mvaRes;
731  std::vector<Bool_t> mvaResTypes;
732  std::vector<Float_t> mvaResWeights;
733 
734  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
735 
736  // Vector transpose due to values being stored as
737  // [ [0, 1, 2], [0, 1, 2], ... ]
738  // in ResultsMulticlass::GetValueVector.
739  mvaRes.reserve(rawMvaRes->size());
740  for (auto item : *rawMvaRes) {
741  mvaRes.push_back(item[iClass]);
742  }
743 
744  auto eventCollection = dataset->GetEventCollection(Types::kTesting);
745  mvaResTypes.reserve(eventCollection.size());
746  mvaResWeights.reserve(eventCollection.size());
747  for (auto ev : eventCollection) {
748  mvaResTypes.push_back(ev->GetClass() == iClass);
749  mvaResWeights.push_back(ev->GetWeight());
750  }
751 
752  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
753  }
754 
755  return rocCurve;
756 }
757 
758 ////////////////////////////////////////////////////////////////////////////////
759 /// Calculate the integral of the ROC curve, also known as the area under curve
760 /// (AUC), for a given method.
761 ///
762 /// Argument iClass specifies the class to generate the ROC curve in a
763 /// multiclass setting. It is ignored for binary classification.
764 ///
765 
767 {
768  return GetROCIntegral((TString)loader->GetName(), theMethodName, iClass);
769 }
770 
771 ////////////////////////////////////////////////////////////////////////////////
772 /// Calculate the integral of the ROC curve, also known as the area under curve
773 /// (AUC), for a given method.
774 ///
775 /// Argument iClass specifies the class to generate the ROC curve in a
776 /// multiclass setting. It is ignored for binary classification.
777 ///
778 
779 Double_t TMVA::Factory::GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass)
780 {
781  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
782  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
783  return 0;
784  }
785 
786  if ( ! this->HasMethod(datasetname, theMethodName) ) {
787  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
788  return 0;
789  }
790 
791  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
792  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
793  Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
794  << Endl;
795  return 0;
796  }
797 
798  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
799  if (!rocCurve) {
800  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ",
801  theMethodName.Data(), datasetname.Data())
802  << Endl;
803  return 0;
804  }
805 
807  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
808  delete rocCurve;
809 
810  return rocIntegral;
811 }
812 
813 ////////////////////////////////////////////////////////////////////////////////
814 /// Argument iClass specifies the class to generate the ROC curve in a
815 /// multiclass setting. It is ignored for binary classification.
816 ///
817 /// Returns a ROC graph for a given method, or nullptr on error.
818 ///
819 /// Note: Evaluation of the given method must have been run prior to ROC
820 /// generation through Factory::EvaluateAllMetods.
821 ///
822 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
823 /// and the others considered background. This is ok in binary classification
824 /// but in in multi class classification, the ROC surface is an N dimensional
825 /// shape, where N is number of classes - 1.
826 
827 TGraph* TMVA::Factory::GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles, UInt_t iClass)
828 {
829  return GetROCCurve( (TString)loader->GetName(), theMethodName, setTitles, iClass );
830 }
831 
832 ////////////////////////////////////////////////////////////////////////////////
833 /// Argument iClass specifies the class to generate the ROC curve in a
834 /// multiclass setting. It is ignored for binary classification.
835 ///
836 /// Returns a ROC graph for a given method, or nullptr on error.
837 ///
838 /// Note: Evaluation of the given method must have been run prior to ROC
839 /// generation through Factory::EvaluateAllMetods.
840 ///
841 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
842 /// and the others considered background. This is ok in binary classification
843 /// but in in multi class classification, the ROC surface is an N dimensional
844 /// shape, where N is number of classes - 1.
845 
846 TGraph* TMVA::Factory::GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles, UInt_t iClass)
847 {
848  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
849  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
850  return nullptr;
851  }
852 
853  if ( ! this->HasMethod(datasetname, theMethodName) ) {
854  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
855  return nullptr;
856  }
857 
858  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
859  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
860  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.") << Endl;
861  return nullptr;
862  }
863 
864  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
865  TGraph *graph = nullptr;
866 
867  if ( ! rocCurve ) {
868  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
869  return nullptr;
870  }
871 
872  graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
873  delete rocCurve;
874 
875  if(setTitles) {
876  graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
877  graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
878  graph->SetTitle(Form("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()));
879  }
880 
881  return graph;
882 }
883 
884 ////////////////////////////////////////////////////////////////////////////////
885 /// Generate a collection of graphs, for all methods for a given class. Suitable
886 /// for comparing method performance.
887 ///
888 /// Argument iClass specifies the class to generate the ROC curve in a
889 /// multiclass setting. It is ignored for binary classification.
890 ///
891 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
892 /// and the others considered background. This is ok in binary classification
893 /// but in in multi class classification, the ROC surface is an N dimensional
894 /// shape, where N is number of classes - 1.
895 
897 {
898  return GetROCCurveAsMultiGraph((TString)loader->GetName(), iClass);
899 }
900 
901 ////////////////////////////////////////////////////////////////////////////////
902 /// Generate a collection of graphs, for all methods for a given class. Suitable
903 /// for comparing method performance.
904 ///
905 /// Argument iClass specifies the class to generate the ROC curve in a
906 /// multiclass setting. It is ignored for binary classification.
907 ///
908 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
909 /// and the others considered background. This is ok in binary classification
910 /// but in in multi class classification, the ROC surface is an N dimensional
911 /// shape, where N is number of classes - 1.
912 
914 {
915  UInt_t line_color = 1;
916 
917  TMultiGraph *multigraph = new TMultiGraph();
918 
919  MVector *methods = fMethodsMap[datasetname.Data()];
920  for (auto * method_raw : *methods) {
921  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(method_raw);
922  if (method == nullptr) { continue; }
923 
924  TString methodName = method->GetMethodName();
925  UInt_t nClasses = method->DataInfo().GetNClasses();
926 
927  if ( this->fAnalysisType == Types::kMulticlass && iClass >= nClasses ) {
928  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) << Endl;
929  continue;
930  }
931 
932  TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
933 
934  TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass);
935  graph->SetTitle(methodName);
936 
937  graph->SetLineWidth(2);
938  graph->SetLineColor(line_color++);
939  graph->SetFillColor(10);
940 
941  multigraph->Add(graph);
942  }
943 
944  if ( multigraph->GetListOfGraphs() == nullptr ) {
945  Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
946  return nullptr;
947  }
948 
949  return multigraph;
950 }
951 
952 ////////////////////////////////////////////////////////////////////////////////
953 /// Draws ROC curves for all methods booked with the factory for a given class
954 /// onto a canvas.
955 ///
956 /// Argument iClass specifies the class to generate the ROC curve in a
957 /// multiclass setting. It is ignored for binary classification.
958 ///
959 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
960 /// and the others considered background. This is ok in binary classification
961 /// but in in multi class classification, the ROC surface is an N dimensional
962 /// shape, where N is number of classes - 1.
963 
965 {
966  return GetROCCurve((TString)loader->GetName(), iClass);
967 }
968 
969 ////////////////////////////////////////////////////////////////////////////////
970 /// Draws ROC curves for all methods booked with the factory for a given class.
971 ///
972 /// Argument iClass specifies the class to generate the ROC curve in a
973 /// multiclass setting. It is ignored for binary classification.
974 ///
975 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
976 /// and the others considered background. This is ok in binary classification
977 /// but in in multi class classification, the ROC surface is an N dimensional
978 /// shape, where N is number of classes - 1.
979 
981 {
982  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
983  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
984  return 0;
985  }
986 
987  TString name = Form("ROCCurve %s class %i", datasetname.Data(), iClass);
988  TCanvas *canvas = new TCanvas(name, "ROC Curve", 200, 10, 700, 500);
989  canvas->SetGrid();
990 
991  TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
992 
993  if ( multigraph ) {
994  multigraph->Draw("AL");
995 
996  multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
997  multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
998 
999  TString titleString = Form("Signal efficiency vs. Background rejection");
1000  if (this->fAnalysisType == Types::kMulticlass) {
1001  titleString = Form("%s (Class=%i)", titleString.Data(), iClass);
1002  }
1003 
1004  // Workaround for TMultigraph not drawing title correctly.
1005  multigraph->GetHistogram()->SetTitle( titleString );
1006  multigraph->SetTitle( titleString );
1007 
1008  canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1009  }
1010 
1011  return canvas;
1012 }
1013 
1014 ////////////////////////////////////////////////////////////////////////////////
1015 /// Iterates through all booked methods and calls training
1016 
1018 {
1019  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1020  // iterates over all MVAs that have been booked, and calls their training methods
1021 
1022 
1023  // don't do anything if no method booked
1024  if (fMethodsMap.empty()) {
1025  Log() << kINFO << "...nothing found to train" << Endl;
1026  return;
1027  }
1028 
1029  // here the training starts
1030  //Log() << kINFO << " " << Endl;
1031  Log() << kDEBUG << "Train all methods for "
1032  << (fAnalysisType == Types::kRegression ? "Regression" :
1033  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
1034 
1035  std::map<TString,MVector*>::iterator itrMap;
1036 
1037  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1038  {
1039  MVector *methods=itrMap->second;
1040  MVector::iterator itrMethod;
1041 
1042  // iterate over methods and train
1043  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1045  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1046 
1047  if(mva==0) continue;
1048 
1049  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1050  Log() << kFATAL << "No input data for the training provided!" << Endl;
1051  }
1052 
1053  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
1054  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1055  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
1056  && mva->DataInfo().GetNClasses() < 2 )
1057  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1058 
1059  // first print some information about the default dataset
1061 
1062 
1063  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1064  Log() << kWARNING << "Method " << mva->GetMethodName()
1065  << " not trained (training tree has less entries ["
1066  << mva->Data()->GetNTrainingEvents()
1067  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1068  continue;
1069  }
1070 
1071  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1072  << (fAnalysisType == Types::kRegression ? "Regression" :
1073  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
1074  mva->TrainMethod();
1075  Log() << kHEADER << "Training finished" << Endl << Endl;
1076  }
1077 
1078  if (fAnalysisType != Types::kRegression) {
1079 
1080  // variable ranking
1081  //Log() << Endl;
1082  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1083  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1084  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1085  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1086 
1087  // create and print ranking
1088  const Ranking* ranking = (*itrMethod)->CreateRanking();
1089  if (ranking != 0) ranking->Print();
1090  else Log() << kINFO << "No variable ranking supplied by classifier: "
1091  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
1092  }
1093  }
1094  }
1095 
1096  // delete all methods and recreate them from weight file - this ensures that the application
1097  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1098  // in the testing
1099  //Log() << Endl;
1100  if (fModelPersistence) {
1101 
1102  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1103 
1104  if(!IsSilentFile())RootBaseDir()->cd();
1105 
1106  // iterate through all booked methods
1107  for (UInt_t i=0; i<methods->size(); i++) {
1108 
1109  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
1110  if(m==0) continue;
1111 
1112  TMVA::Types::EMVA methodType = m->GetMethodType();
1113  TString weightfile = m->GetWeightFileName();
1114 
1115  // decide if .txt or .xml file should be read:
1116  if (READXML) weightfile.ReplaceAll(".txt",".xml");
1117 
1118  DataSetInfo& dataSetInfo = m->DataInfo();
1119  TString testvarName = m->GetTestvarName();
1120  delete m; //itrMethod[i];
1121 
1122  // recreate
1123  m = dynamic_cast<MethodBase*>( ClassifierFactory::Instance()
1124  .Create( std::string(Types::Instance().GetMethodName(methodType)),
1125  dataSetInfo, weightfile ) );
1126  if( m->GetMethodType() == Types::kCategory ){
1127  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(m));
1128  if( !methCat ) Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1129  else methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1130  }
1131  //ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1132 
1133 
1134  TString fFileDir= m->DataInfo().GetName();
1135  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
1136  m->SetWeightFileDir(fFileDir);
1139  m->SetAnalysisType(fAnalysisType);
1140  m->SetupMethod();
1141  m->ReadStateFromFile();
1142  m->SetTestvarName(testvarName);
1143 
1144  // replace trained method by newly created one (from weight file) in methods vector
1145  (*methods)[i] = m;
1146  }
1147  }
1148  }
1149 }
1150 
1151 ////////////////////////////////////////////////////////////////////////////////
1152 
1154 {
1155  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1156 
1157  // don't do anything if no method booked
1158  if (fMethodsMap.empty()) {
1159  Log() << kINFO << "...nothing found to test" << Endl;
1160  return;
1161  }
1162  std::map<TString,MVector*>::iterator itrMap;
1163 
1164  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1165  {
1166  MVector *methods=itrMap->second;
1167  MVector::iterator itrMethod;
1168 
1169  // iterate over methods and test
1170  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1172  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1173  if(mva==0) continue;
1174  Types::EAnalysisType analysisType = mva->GetAnalysisType();
1175  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1176  << (analysisType == Types::kRegression ? "Regression" :
1177  (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << " performance" << Endl << Endl;
1178  mva->AddOutput( Types::kTesting, analysisType );
1179  }
1180  }
1181 }
1182 
1183 ////////////////////////////////////////////////////////////////////////////////
1184 
1185 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
1186 {
1187  if (methodTitle != "") {
1188  IMethod* method = GetMethod(datasetname, methodTitle);
1189  if (method) method->MakeClass();
1190  else {
1191  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
1192  << "\" in list" << Endl;
1193  }
1194  }
1195  else {
1196 
1197  // no classifier specified, print all help messages
1198  MVector *methods=fMethodsMap.find(datasetname)->second;
1199  MVector::const_iterator itrMethod;
1200  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1201  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1202  if(method==0) continue;
1203  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1204  method->MakeClass();
1205  }
1206  }
1207 }
1208 
1209 ////////////////////////////////////////////////////////////////////////////////
1210 /// Print predefined help message of classifier.
1211 /// Iterate over methods and test.
1212 
1213 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1214 {
1215  if (methodTitle != "") {
1216  IMethod* method = GetMethod(datasetname , methodTitle );
1217  if (method) method->PrintHelpMessage();
1218  else {
1219  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1220  << "\" in list" << Endl;
1221  }
1222  }
1223  else {
1224 
1225  // no classifier specified, print all help messages
1226  MVector *methods=fMethodsMap.find(datasetname)->second;
1227  MVector::const_iterator itrMethod ;
1228  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1229  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1230  if(method==0) continue;
1231  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1232  method->PrintHelpMessage();
1233  }
1234  }
1235 }
1236 
1237 ////////////////////////////////////////////////////////////////////////////////
1238 /// Iterates over all MVA input variables and evaluates them.
1239 
1241 {
1242  Log() << kINFO << "Evaluating all variables..." << Endl;
1244 
1245  for (UInt_t i=0; i<loader->DefaultDataSetInfo().GetNVariables(); i++) {
1247  if (options.Contains("V")) s += ":V";
1248  this->BookMethod(loader, "Variable", s );
1249  }
1250 }
1251 
1252 ////////////////////////////////////////////////////////////////////////////////
1253 /// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1254 
1256 {
1257  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1258 
1259  // don't do anything if no method booked
1260  if (fMethodsMap.empty()) {
1261  Log() << kINFO << "...nothing found to evaluate" << Endl;
1262  return;
1263  }
1264  std::map<TString,MVector*>::iterator itrMap;
1265 
1266  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1267  {
1268  MVector *methods=itrMap->second;
1269 
1270  // -----------------------------------------------------------------------
1271  // First part of evaluation process
1272  // --> compute efficiencies, and other separation estimators
1273  // -----------------------------------------------------------------------
1274 
1275  // although equal, we now want to separate the output for the variables
1276  // and the real methods
1277  Int_t isel; // will be 0 for a Method; 1 for a Variable
1278  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1279 
1280  std::vector<std::vector<TString> > mname(2);
1281  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1282  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1283  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1284  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1285 
1286  std::vector<std::vector<Float_t> > multiclass_testEff;
1287  std::vector<std::vector<Float_t> > multiclass_trainEff;
1288  std::vector<std::vector<Float_t> > multiclass_testPur;
1289  std::vector<std::vector<Float_t> > multiclass_trainPur;
1290 
1291  // Multiclass confusion matrices.
1292  std::vector<TMatrixD> multiclass_testConfusionEffB01;
1293  std::vector<TMatrixD> multiclass_testConfusionEffB10;
1294  std::vector<TMatrixD> multiclass_testConfusionEffB30;
1295 
1296  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1297  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1298  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1299  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1300  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1301  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1302  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1303  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1304  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1305  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1306 
1307  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1308  std::vector<std::vector<Double_t> > biastrainT(1);
1309  std::vector<std::vector<Double_t> > biastestT(1);
1310  std::vector<std::vector<Double_t> > devtrainT(1);
1311  std::vector<std::vector<Double_t> > devtestT(1);
1312  std::vector<std::vector<Double_t> > rmstrainT(1);
1313  std::vector<std::vector<Double_t> > rmstestT(1);
1314  std::vector<std::vector<Double_t> > minftrainT(1);
1315  std::vector<std::vector<Double_t> > minftestT(1);
1316 
1317  // following vector contains all methods - with the exception of Cuts, which are special
1318  MVector methodsNoCuts;
1319 
1320  Bool_t doRegression = kFALSE;
1321  Bool_t doMulticlass = kFALSE;
1322 
1323  // iterate over methods and evaluate
1324  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1326  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1327  if(theMethod==0) continue;
1328  theMethod->SetFile(fgTargetFile);
1329  theMethod->SetSilentFile(IsSilentFile());
1330  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1331 
1332  if (theMethod->DoRegression()) {
1333  doRegression = kTRUE;
1334 
1335  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1336  Double_t bias, dev, rms, mInf;
1337  Double_t biasT, devT, rmsT, mInfT;
1338  Double_t rho;
1339 
1340  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1341  biastest[0] .push_back( bias );
1342  devtest[0] .push_back( dev );
1343  rmstest[0] .push_back( rms );
1344  minftest[0] .push_back( mInf );
1345  rhotest[0] .push_back( rho );
1346  biastestT[0] .push_back( biasT );
1347  devtestT[0] .push_back( devT );
1348  rmstestT[0] .push_back( rmsT );
1349  minftestT[0] .push_back( mInfT );
1350 
1351  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1352  biastrain[0] .push_back( bias );
1353  devtrain[0] .push_back( dev );
1354  rmstrain[0] .push_back( rms );
1355  minftrain[0] .push_back( mInf );
1356  rhotrain[0] .push_back( rho );
1357  biastrainT[0].push_back( biasT );
1358  devtrainT[0] .push_back( devT );
1359  rmstrainT[0] .push_back( rmsT );
1360  minftrainT[0].push_back( mInfT );
1361 
1362  mname[0].push_back( theMethod->GetMethodName() );
1363  nmeth_used[0]++;
1364  if(!IsSilentFile())
1365  {
1366  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1369  }
1370  } else if (theMethod->DoMulticlass()) {
1371  // ====================================================================
1372  // === Multiclass evaluation
1373  // ====================================================================
1374  doMulticlass = kTRUE;
1375  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1376 
1377  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1378  // This is why it is disabled for now.
1379  // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1380  // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1381  // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1382 
1383  // Confusion matrix at three background efficiency levels
1384  multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1385  multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1386  multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1387 
1388  if (not IsSilentFile()) {
1389  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1392  }
1393 
1394  nmeth_used[0]++;
1395  mname[0].push_back(theMethod->GetMethodName());
1396  } else {
1397 
1398  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1399  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1400 
1401  // perform the evaluation
1402  theMethod->TestClassification();
1403 
1404  // evaluate the classifier
1405  mname[isel].push_back(theMethod->GetMethodName());
1406  sig[isel].push_back(theMethod->GetSignificance());
1407  sep[isel].push_back(theMethod->GetSeparation());
1408  roc[isel].push_back(theMethod->GetROCIntegral());
1409 
1410  Double_t err;
1411  eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1412  eff01err[isel].push_back(err);
1413  eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1414  eff10err[isel].push_back(err);
1415  eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1416  eff30err[isel].push_back(err);
1417  effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1418 
1419  trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1420  trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1421  trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1422 
1423  nmeth_used[isel]++;
1424 
1425  if (!IsSilentFile()) {
1426  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1429  }
1430  }
1431  }
1432  if (doRegression) {
1433 
1434  std::vector<TString> vtemps = mname[0];
1435  std::vector< std::vector<Double_t> > vtmp;
1436  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1437  vtmp.push_back( devtrain[0] );
1438  vtmp.push_back( biastest[0] );
1439  vtmp.push_back( biastrain[0] );
1440  vtmp.push_back( rmstest[0] );
1441  vtmp.push_back( rmstrain[0] );
1442  vtmp.push_back( minftest[0] );
1443  vtmp.push_back( minftrain[0] );
1444  vtmp.push_back( rhotest[0] );
1445  vtmp.push_back( rhotrain[0] );
1446  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1447  vtmp.push_back( devtrainT[0] );
1448  vtmp.push_back( biastestT[0] );
1449  vtmp.push_back( biastrainT[0]);
1450  vtmp.push_back( rmstestT[0] );
1451  vtmp.push_back( rmstrainT[0] );
1452  vtmp.push_back( minftestT[0] );
1453  vtmp.push_back( minftrainT[0]);
1454  gTools().UsefulSortAscending( vtmp, &vtemps );
1455  mname[0] = vtemps;
1456  devtest[0] = vtmp[0];
1457  devtrain[0] = vtmp[1];
1458  biastest[0] = vtmp[2];
1459  biastrain[0] = vtmp[3];
1460  rmstest[0] = vtmp[4];
1461  rmstrain[0] = vtmp[5];
1462  minftest[0] = vtmp[6];
1463  minftrain[0] = vtmp[7];
1464  rhotest[0] = vtmp[8];
1465  rhotrain[0] = vtmp[9];
1466  devtestT[0] = vtmp[10];
1467  devtrainT[0] = vtmp[11];
1468  biastestT[0] = vtmp[12];
1469  biastrainT[0] = vtmp[13];
1470  rmstestT[0] = vtmp[14];
1471  rmstrainT[0] = vtmp[15];
1472  minftestT[0] = vtmp[16];
1473  minftrainT[0] = vtmp[17];
1474  } else if (doMulticlass) {
1475  // TODO: fill in something meaningful
1476  // If there is some ranking of methods to be done it should be done here.
1477  // However, this is not so easy to define for multiclass so it is left out for now.
1478 
1479  }
1480  else {
1481  // now sort the variables according to the best 'eff at Beff=0.10'
1482  for (Int_t k=0; k<2; k++) {
1483  std::vector< std::vector<Double_t> > vtemp;
1484  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1485  vtemp.push_back( eff10[k] );
1486  vtemp.push_back( eff01[k] );
1487  vtemp.push_back( eff30[k] );
1488  vtemp.push_back( eff10err[k] );
1489  vtemp.push_back( eff01err[k] );
1490  vtemp.push_back( eff30err[k] );
1491  vtemp.push_back( trainEff10[k] );
1492  vtemp.push_back( trainEff01[k] );
1493  vtemp.push_back( trainEff30[k] );
1494  vtemp.push_back( sig[k] );
1495  vtemp.push_back( sep[k] );
1496  vtemp.push_back( roc[k] );
1497  std::vector<TString> vtemps = mname[k];
1498  gTools().UsefulSortDescending( vtemp, &vtemps );
1499  effArea[k] = vtemp[0];
1500  eff10[k] = vtemp[1];
1501  eff01[k] = vtemp[2];
1502  eff30[k] = vtemp[3];
1503  eff10err[k] = vtemp[4];
1504  eff01err[k] = vtemp[5];
1505  eff30err[k] = vtemp[6];
1506  trainEff10[k] = vtemp[7];
1507  trainEff01[k] = vtemp[8];
1508  trainEff30[k] = vtemp[9];
1509  sig[k] = vtemp[10];
1510  sep[k] = vtemp[11];
1511  roc[k] = vtemp[12];
1512  mname[k] = vtemps;
1513  }
1514  }
1515 
1516  // -----------------------------------------------------------------------
1517  // Second part of evaluation process
1518  // --> compute correlations among MVAs
1519  // --> compute correlations between input variables and MVA (determines importance)
1520  // --> count overlaps
1521  // -----------------------------------------------------------------------
1522  if(fCorrelations)
1523  {
1524  const Int_t nmeth = methodsNoCuts.size();
1525  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1526  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1527  if (!doRegression && !doMulticlass ) {
1528 
1529  if (nmeth > 0) {
1530 
1531  // needed for correlations
1532  Double_t *dvec = new Double_t[nmeth+nvar];
1533  std::vector<Double_t> rvec;
1534 
1535  // for correlations
1536  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1537  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1538 
1539  // set required tree branch references
1540  Int_t ivar = 0;
1541  std::vector<TString>* theVars = new std::vector<TString>;
1542  std::vector<ResultsClassification*> mvaRes;
1543  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1544  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1545  if(m==0) continue;
1546  theVars->push_back( m->GetTestvarName() );
1547  rvec.push_back( m->GetSignalReferenceCut() );
1548  theVars->back().ReplaceAll( "MVA_", "" );
1549  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1552  }
1553 
1554  // for overlap study
1555  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1556  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1557  (*overlapS) *= 0; // init...
1558  (*overlapB) *= 0; // init...
1559 
1560  // loop over test tree
1561  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1563  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1564  const Event* ev = defDs->GetEvent(ievt);
1565 
1566  // for correlations
1567  TMatrixD* theMat = 0;
1568  for (Int_t im=0; im<nmeth; im++) {
1569  // check for NaN value
1570  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1571  if (TMath::IsNaN(retval)) {
1572  Log() << kWARNING << "Found NaN return value in event: " << ievt
1573  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1574  dvec[im] = 0;
1575  }
1576  else dvec[im] = retval;
1577  }
1578  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1579  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1580  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1581 
1582  // count overlaps
1583  for (Int_t im=0; im<nmeth; im++) {
1584  for (Int_t jm=im; jm<nmeth; jm++) {
1585  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1586  (*theMat)(im,jm)++;
1587  if (im != jm) (*theMat)(jm,im)++;
1588  }
1589  }
1590  }
1591  }
1592 
1593  // renormalise overlap matrix
1594  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1595  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1596 
1597  tpSig->MakePrincipals();
1598  tpBkg->MakePrincipals();
1599 
1600  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1601  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1602 
1603  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1604  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1605 
1606  // print correlation matrices
1607  if (corrMatS != 0 && corrMatB != 0) {
1608 
1609  // extract MVA matrix
1610  TMatrixD mvaMatS(nmeth,nmeth);
1611  TMatrixD mvaMatB(nmeth,nmeth);
1612  for (Int_t im=0; im<nmeth; im++) {
1613  for (Int_t jm=0; jm<nmeth; jm++) {
1614  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1615  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1616  }
1617  }
1618 
1619  // extract variables - to MVA matrix
1620  std::vector<TString> theInputVars;
1621  TMatrixD varmvaMatS(nvar,nmeth);
1622  TMatrixD varmvaMatB(nvar,nmeth);
1623  for (Int_t iv=0; iv<nvar; iv++) {
1624  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1625  for (Int_t jm=0; jm<nmeth; jm++) {
1626  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1627  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1628  }
1629  }
1630 
1631  if (nmeth > 1) {
1632  Log() << kINFO << Endl;
1633  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1634  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1635  Log() << kINFO << Endl;
1636 
1637  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1638  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1639  Log() << kINFO << Endl;
1640  }
1641 
1642  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1643  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1644  Log() << kINFO << Endl;
1645 
1646  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1647  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1648  Log() << kINFO << Endl;
1649  }
1650  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1651 
1652  // print overlap matrices
1653  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1654  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1655  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1656  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1657  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1658 
1659  // give notice that cut method has been excluded from this test
1660  if (nmeth != (Int_t)methods->size())
1661  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1662 
1663  if (nmeth > 1) {
1664  Log() << kINFO << Endl;
1665  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1666  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1667  Log() << kINFO << Endl;
1668 
1669  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1670  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1671  }
1672 
1673  // cleanup
1674  delete tpSig;
1675  delete tpBkg;
1676  delete corrMatS;
1677  delete corrMatB;
1678  delete theVars;
1679  delete overlapS;
1680  delete overlapB;
1681  delete [] dvec;
1682  }
1683  }
1684  }
1685  // -----------------------------------------------------------------------
1686  // Third part of evaluation process
1687  // --> output
1688  // -----------------------------------------------------------------------
1689 
1690  if (doRegression) {
1691 
1692  Log() << kINFO << Endl;
1693  TString hLine = "--------------------------------------------------------------------------------------------------";
1694  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1695  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1696  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1697  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1698  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1699  Log() << kINFO << hLine << Endl;
1700  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1701  Log() << kINFO << hLine << Endl;
1702 
1703  for (Int_t i=0; i<nmeth_used[0]; i++) {
1704  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1705  if(theMethod==0) continue;
1706 
1707  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1708  theMethod->fDataSetInfo.GetName(),
1709  (const char*)mname[0][i],
1710  biastest[0][i], biastestT[0][i],
1711  rmstest[0][i], rmstestT[0][i],
1712  minftest[0][i], minftestT[0][i] )
1713  << Endl;
1714  }
1715  Log() << kINFO << hLine << Endl;
1716  Log() << kINFO << Endl;
1717  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1718  Log() << kINFO << "(overtraining check)" << Endl;
1719  Log() << kINFO << hLine << Endl;
1720  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1721  Log() << kINFO << hLine << Endl;
1722 
1723  for (Int_t i=0; i<nmeth_used[0]; i++) {
1724  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1725  if(theMethod==0) continue;
1726  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1727  theMethod->fDataSetInfo.GetName(),
1728  (const char*)mname[0][i],
1729  biastrain[0][i], biastrainT[0][i],
1730  rmstrain[0][i], rmstrainT[0][i],
1731  minftrain[0][i], minftrainT[0][i] )
1732  << Endl;
1733  }
1734  Log() << kINFO << hLine << Endl;
1735  Log() << kINFO << Endl;
1736  } else if (doMulticlass) {
1737  // ====================================================================
1738  // === Multiclass Output
1739  // ====================================================================
1740 
1741  TString hLine =
1742  "-------------------------------------------------------------------------------------------------------";
1743 
1744  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1745  // This is why it is disabled for now.
1746  //
1747  // // --- Acheivable signal efficiency * signal purity
1748  // // --------------------------------------------------------------------
1749  // Log() << kINFO << Endl;
1750  // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1751  // Log() << kINFO << hLine << Endl;
1752 
1753  // // iterate over methods and evaluate
1754  // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1755  // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1756  // if (theMethod == 0) {
1757  // continue;
1758  // }
1759 
1760  // TString header = "DataSet Name MVA Method ";
1761  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1762  // header += Form("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1763  // }
1764 
1765  // Log() << kINFO << header << Endl;
1766  // Log() << kINFO << hLine << Endl;
1767  // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1768  // TString res = Form("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), (const char *)mname[0][i]);
1769  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1770  // res += Form("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1771  // }
1772  // Log() << kINFO << res << Endl;
1773  // }
1774 
1775  // Log() << kINFO << hLine << Endl;
1776  // Log() << kINFO << Endl;
1777  // }
1778 
1779  // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1780  // --------------------------------------------------------------------
1781  TString header1 =
1782  Form("%-15s%-15s%-10s%-10s%-10s%-10s", "Dataset", "MVA Method", "", "Sig eff", "Sig eff", "Sig eff");
1783  TString header2 =
1784  Form("%-15s%-15s%-10s%-10s%-10s%-10s", "Name:", "/ Class:", "ROC AUC", "@B=0.01", "@B=0.10", "@B=0.30");
1785  Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1786  Log() << kINFO << hLine << Endl;
1787  Log() << kINFO << Endl;
1788  Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1789  Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1790 
1791  Log() << kINFO << Endl;
1792  Log() << kINFO << header1 << Endl;
1793  Log() << kINFO << header2 << Endl;
1794  for (Int_t k = 0; k < 2; k++) {
1795  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1796  if (k == 1) {
1797  mname[k][i].ReplaceAll("Variable_", "");
1798  }
1799 
1800  const TString datasetName = itrMap->first;
1801  const TString mvaName = mname[k][i];
1802 
1803  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1804  if (theMethod == 0) {
1805  continue;
1806  }
1807 
1808  Log() << kINFO << Endl;
1809  TString row = Form("%-15s%-15s", datasetName.Data(), mvaName.Data());
1810  Log() << kINFO << row << Endl;
1811 
1812  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1813  DataSet *dataset = theMethod->Data();
1814  TMVA::Results *results = theMethod->Data()->GetResults(mname[k][i], Types::kTesting, Types::kMulticlass);
1815 
1816  for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1817  std::vector<Float_t> mvaRes;
1818  std::vector<Bool_t> mvaResType;
1819  std::vector<Float_t> mvaResWeight;
1820 
1821  std::vector<std::vector<Float_t>> *rawMvaRes =
1822  dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
1823 
1824  // Vector transpose due to values being stored as
1825  // [ [0, 1, 2], [0, 1, 2], ... ]
1826  // in ResultsMulticlass::GetValueVector.
1827  mvaRes.reserve(rawMvaRes->size());
1828  for (auto item : *rawMvaRes) {
1829  mvaRes.push_back(item[iClass]);
1830  }
1831 
1832  auto eventCollection = dataset->GetEventCollection();
1833  mvaResType.reserve(eventCollection.size());
1834  mvaResWeight.reserve(eventCollection.size());
1835  for (auto ev : eventCollection) {
1836  mvaResType.push_back(ev->GetClass() == iClass);
1837  mvaResWeight.push_back(ev->GetWeight());
1838  }
1839 
1840  ROCCurve rocCurve = ROCCurve(mvaRes, mvaResType, mvaResWeight);
1841 
1842  const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1843  const Double_t rocauc = rocCurve.GetROCIntegral();
1844  const Double_t effB01 = rocCurve.GetEffSForEffB(0.01);
1845  const Double_t effB10 = rocCurve.GetEffSForEffB(0.10);
1846  const Double_t effB30 = rocCurve.GetEffSForEffB(0.30);
1847  row = Form("%-15s%-15s%-10.3f%-10.3f%-10.3f%-10.3f", "", className.Data(), rocauc, effB01, effB10,
1848  effB30);
1849  Log() << kINFO << row << Endl;
1850  }
1851  }
1852  }
1853  Log() << kINFO << hLine << Endl;
1854  Log() << kINFO << Endl;
1855 
1856  // --- Confusion matrices
1857  // --------------------------------------------------------------------
1858  auto printMatrix = [](TMatrixD mat, std::vector<TString> classnames, UInt_t numClasses, MsgLogger &stream) {
1859  // assert (classLabledWidth >= valueLabelWidth + 2)
1860  // if (...) {Log() << kWARN << "..." << Endl; }
1861 
1862  TString header = Form("%-12s", " ");
1863  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1864  header += Form(" %-12s", classnames[iCol].Data());
1865  }
1866  stream << kINFO << header << Endl;
1867 
1868  for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1869  stream << kINFO << Form("%-12s", classnames[iRow].Data());
1870 
1871  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1872  if (iCol == iRow) {
1873  stream << kINFO << Form(" %-12s", "-");
1874  continue;
1875  }
1876 
1877  Double_t value = mat[iRow][iCol];
1878  stream << kINFO << Form(" %-12.3f", value);
1879  }
1880  stream << kINFO << Endl;
1881  }
1882  };
1883 
1884  Log() << kINFO << Endl;
1885  Log() << kINFO << "Confusion matrices for all methods" << Endl;
1886  Log() << kINFO << hLine << Endl;
1887  Log() << kINFO << Endl;
1888  Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
1889  Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
1890  Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
1891  Log() << kINFO << "by the column index is considered background." << Endl;
1892  Log() << kINFO << Endl;
1893  for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1894  MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
1895  if (theMethod == nullptr) {
1896  continue;
1897  }
1898  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1899 
1900  std::vector<TString> classnames;
1901  for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
1902  classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
1903  }
1904  Log() << kINFO << "Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
1905  << Endl;
1906  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
1907  printMatrix(multiclass_testConfusionEffB01[iMethod], classnames, numClasses, Log());
1908  Log() << kINFO << Endl;
1909 
1910  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
1911  printMatrix(multiclass_testConfusionEffB10[iMethod], classnames, numClasses, Log());
1912  Log() << kINFO << Endl;
1913 
1914  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
1915  printMatrix(multiclass_testConfusionEffB30[iMethod], classnames, numClasses, Log());
1916  Log() << kINFO << Endl;
1917  }
1918  Log() << kINFO << hLine << Endl;
1919  Log() << kINFO << Endl;
1920 
1921  } else {
1922  // Binary classification
1923  if (fROC) {
1924  Log().EnableOutput();
1926  Log() << Endl;
1927  TString hLine = "------------------------------------------------------------------------------------------"
1928  "-------------------------";
1929  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
1930  Log() << kINFO << hLine << Endl;
1931  Log() << kINFO << "DataSet MVA " << Endl;
1932  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
1933 
1934  // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
1935  // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
1936  // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
1937  Log() << kDEBUG << hLine << Endl;
1938  for (Int_t k = 0; k < 2; k++) {
1939  if (k == 1 && nmeth_used[k] > 0) {
1940  Log() << kINFO << hLine << Endl;
1941  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1942  }
1943  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1944  TString datasetName = itrMap->first;
1945  TString methodName = mname[k][i];
1946 
1947  if (k == 1) {
1948  methodName.ReplaceAll("Variable_", "");
1949  }
1950 
1951  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
1952  if (theMethod == 0) {
1953  continue;
1954  }
1955 
1956  TMVA::DataSet *dataset = theMethod->Data();
1957  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
1958  std::vector<Bool_t> *mvaResType =
1959  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1960 
1961  Double_t rocIntegral = 0.0;
1962  if (mvaResType->size() != 0) {
1963  rocIntegral = GetROCIntegral(datasetName, methodName);
1964  }
1965 
1966  if (sep[k][i] < 0 || sig[k][i] < 0) {
1967  // cannot compute separation/significance -> no MVA (usually for Cuts)
1968  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
1969  << Endl;
1970 
1971  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
1972  // %#1.3f %#1.3f | -- --",
1973  // datasetName.Data(),
1974  // methodName.Data(),
1975  // eff01[k][i], Int_t(1000*eff01err[k][i]),
1976  // eff10[k][i], Int_t(1000*eff10err[k][i]),
1977  // eff30[k][i], Int_t(1000*eff30err[k][i]),
1978  // effArea[k][i],rocIntegral) << Endl;
1979  } else {
1980  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
1981  << Endl;
1982  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
1983  // %#1.3f %#1.3f | %#1.3f %#1.3f",
1984  // datasetName.Data(),
1985  // methodName.Data(),
1986  // eff01[k][i], Int_t(1000*eff01err[k][i]),
1987  // eff10[k][i], Int_t(1000*eff10err[k][i]),
1988  // eff30[k][i], Int_t(1000*eff30err[k][i]),
1989  // effArea[k][i],rocIntegral,
1990  // sep[k][i], sig[k][i]) << Endl;
1991  }
1992  }
1993  }
1994  Log() << kINFO << hLine << Endl;
1995  Log() << kINFO << Endl;
1996  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
1997  Log() << kINFO << hLine << Endl;
1998  Log() << kINFO
1999  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2000  << Endl;
2001  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2002  << Endl;
2003  Log() << kINFO << hLine << Endl;
2004  for (Int_t k = 0; k < 2; k++) {
2005  if (k == 1 && nmeth_used[k] > 0) {
2006  Log() << kINFO << hLine << Endl;
2007  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2008  }
2009  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2010  if (k == 1) mname[k][i].ReplaceAll("Variable_", "");
2011  MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2012  if (theMethod == 0) continue;
2013 
2014  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2015  theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2016  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2017  << Endl;
2018  }
2019  }
2020  Log() << kINFO << hLine << Endl;
2021  Log() << kINFO << Endl;
2022 
2023  if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput();
2024  } // end fROC
2025  }
2026  if(!IsSilentFile())
2027  {
2028  std::list<TString> datasets;
2029  for (Int_t k=0; k<2; k++) {
2030  for (Int_t i=0; i<nmeth_used[k]; i++) {
2031  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
2032  if(theMethod==0) continue;
2033  // write test/training trees
2034  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2035  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
2036  {
2039  datasets.push_back(theMethod->fDataSetInfo.GetName());
2040  }
2041  }
2042  }
2043  }
2044  }//end for MethodsMap
2045  // references for citation
2047 }
2048 
2049 ////////////////////////////////////////////////////////////////////////////////
2050 /// Evaluate Variable Importance
2051 
2052 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2053 {
2055  fSilentFile=kTRUE;//we need silent file here because we need fast classification results
2056 
2057  //getting number of variables and variable names from loader
2058  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2059  if(vitype==VIType::kShort)
2060  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2061  else if(vitype==VIType::kAll)
2062  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2063  else if(vitype==VIType::kRandom&&nbits>10)
2064  {
2065  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
2066  }else
2067  {
2068  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2069  return nullptr;
2070  }
2071 }
2072 
2073 ////////////////////////////////////////////////////////////////////////////////
2074 
2075 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2076 {
2077 
2078  uint64_t x = 0;
2079  uint64_t y = 0;
2080 
2081  //getting number of variables and variable names from loader
2082  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2083  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2084 
2085  uint64_t range = pow(2, nbits);
2086 
2087  //vector to save importances
2088  std::vector<Double_t> importances(nbits);
2089  //vector to save ROC
2090  std::vector<Double_t> ROC(range);
2091  ROC[0]=0.5;
2092  for (int i = 0; i < nbits; i++)importances[i] = 0;
2093 
2094  Double_t SROC, SSROC; //computed ROC value
2095  for ( x = 1; x <range ; x++) {
2096 
2097  std::bitset<VIBITS> xbitset(x);
2098  if (x == 0) continue; //data loader need at least one variable
2099 
2100  //creating loader for seed
2101  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2102 
2103  //adding variables from seed
2104  for (int index = 0; index < nbits; index++) {
2105  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2106  }
2107 
2108  DataLoaderCopy(seedloader,loader);
2109  seedloader->PrepareTrainingAndTestTree(loader->DefaultDataSetInfo().GetCut("Signal"), loader->DefaultDataSetInfo().GetCut("Background"), loader->DefaultDataSetInfo().GetSplitOptions());
2110 
2111  //Booking Seed
2112  BookMethod(seedloader, theMethod, methodTitle, theOption);
2113 
2114  //Train/Test/Evaluation
2115  TrainAllMethods();
2116  TestAllMethods();
2118 
2119  //getting ROC
2120  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2121 
2122  //cleaning information to process sub-seeds
2123  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2124  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2125  delete sresults;
2126  delete seedloader;
2127  this->DeleteAllMethods();
2128 
2129  fMethodsMap.clear();
2130  //removing global result because it is requiring a lot of RAM for all seeds
2131  }
2132 
2133 
2134  for ( x = 0; x <range ; x++)
2135  {
2136  SROC=ROC[x];
2137  for (uint32_t i = 0; i < VIBITS; ++i) {
2138  if (x & (1 << i)) {
2139  y = x & ~(1 << i);
2140  std::bitset<VIBITS> ybitset(y);
2141  //need at least one variable
2142  //NOTE: if sub-seed is zero then is the special case
2143  //that count in xbitset is 1
2144  Double_t ny = log(x - y) / 0.693147;
2145  if (y == 0) {
2146  importances[ny] = SROC - 0.5;
2147  continue;
2148  }
2149 
2150  //getting ROC
2151  SSROC = ROC[y];
2152  importances[ny] += SROC - SSROC;
2153  //cleaning information
2154  }
2155 
2156  }
2157  }
2158  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
2159  return GetImportance(nbits,importances,varNames);
2160 }
2161 
2162 static long int sum(long int i)
2163 {
2164  long int _sum=0;
2165  for(long int n=0;n<i;n++) _sum+=pow(2,n);
2166  return _sum;
2167 }
2168 
2169 ////////////////////////////////////////////////////////////////////////////////
2170 
2171 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2172 {
2173  uint64_t x = 0;
2174  uint64_t y = 0;
2175 
2176  //getting number of variables and variable names from loader
2177  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2178  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2179 
2180  long int range = sum(nbits);
2181 // std::cout<<range<<std::endl;
2182  //vector to save importances
2183  std::vector<Double_t> importances(nbits);
2184  for (int i = 0; i < nbits; i++)importances[i] = 0;
2185 
2186  Double_t SROC, SSROC; //computed ROC value
2187 
2188  x = range;
2189 
2190  std::bitset<VIBITS> xbitset(x);
2191  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //data loader need at least one variable
2192 
2193 
2194  //creating loader for seed
2195  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2196 
2197  //adding variables from seed
2198  for (int index = 0; index < nbits; index++) {
2199  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2200  }
2201 
2202  //Loading Dataset
2203  DataLoaderCopy(seedloader,loader);
2204 
2205  //Booking Seed
2206  BookMethod(seedloader, theMethod, methodTitle, theOption);
2207 
2208  //Train/Test/Evaluation
2209  TrainAllMethods();
2210  TestAllMethods();
2212 
2213  //getting ROC
2214  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2215 
2216  //cleaning information to process sub-seeds
2217  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2218  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2219  delete sresults;
2220  delete seedloader;
2221  this->DeleteAllMethods();
2222  fMethodsMap.clear();
2223 
2224  //removing global result because it is requiring a lot of RAM for all seeds
2225 
2226  for (uint32_t i = 0; i < VIBITS; ++i) {
2227  if (x & (1 << i)) {
2228  y = x & ~(1 << i);
2229  std::bitset<VIBITS> ybitset(y);
2230  //need at least one variable
2231  //NOTE: if sub-seed is zero then is the special case
2232  //that count in xbitset is 1
2233  Double_t ny = log(x - y) / 0.693147;
2234  if (y == 0) {
2235  importances[ny] = SROC - 0.5;
2236  continue;
2237  }
2238 
2239  //creating loader for sub-seed
2240  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2241  //adding variables from sub-seed
2242  for (int index = 0; index < nbits; index++) {
2243  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2244  }
2245 
2246  //Loading Dataset
2247  DataLoaderCopy(subseedloader,loader);
2248 
2249  //Booking SubSeed
2250  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2251 
2252  //Train/Test/Evaluation
2253  TrainAllMethods();
2254  TestAllMethods();
2256 
2257  //getting ROC
2258  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2259  importances[ny] += SROC - SSROC;
2260 
2261  //cleaning information
2262  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2264  delete ssresults;
2265  delete subseedloader;
2266  this->DeleteAllMethods();
2267  fMethodsMap.clear();
2268  }
2269  }
2270  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
2271  return GetImportance(nbits,importances,varNames);
2272 }
2273 
2274 ////////////////////////////////////////////////////////////////////////////////
2275 
2276 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2277 {
2278  TRandom3 *rangen = new TRandom3(0); //Random Gen.
2279 
2280  uint64_t x = 0;
2281  uint64_t y = 0;
2282 
2283  //getting number of variables and variable names from loader
2284  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2285  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2286 
2287  long int range = pow(2, nbits);
2288 
2289  //vector to save importances
2290  std::vector<Double_t> importances(nbits);
2291  Double_t importances_norm = 0;
2292  for (int i = 0; i < nbits; i++)importances[i] = 0;
2293 
2294  Double_t SROC, SSROC; //computed ROC value
2295  for (UInt_t n = 0; n < nseeds; n++) {
2296  x = rangen -> Integer(range);
2297 
2298  std::bitset<32> xbitset(x);
2299  if (x == 0) continue; //data loader need at least one variable
2300 
2301 
2302  //creating loader for seed
2303  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2304 
2305  //adding variables from seed
2306  for (int index = 0; index < nbits; index++) {
2307  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2308  }
2309 
2310  //Loading Dataset
2311  DataLoaderCopy(seedloader,loader);
2312 
2313  //Booking Seed
2314  BookMethod(seedloader, theMethod, methodTitle, theOption);
2315 
2316  //Train/Test/Evaluation
2317  TrainAllMethods();
2318  TestAllMethods();
2320 
2321  //getting ROC
2322  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2323 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2324 
2325  //cleaning information to process sub-seeds
2326  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2327  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2328  delete sresults;
2329  delete seedloader;
2330  this->DeleteAllMethods();
2331  fMethodsMap.clear();
2332 
2333  //removing global result because it is requiring a lot of RAM for all seeds
2334 
2335  for (uint32_t i = 0; i < 32; ++i) {
2336  if (x & (1 << i)) {
2337  y = x & ~(1 << i);
2338  std::bitset<32> ybitset(y);
2339  //need at least one variable
2340  //NOTE: if sub-seed is zero then is the special case
2341  //that count in xbitset is 1
2342  Double_t ny = log(x - y) / 0.693147;
2343  if (y == 0) {
2344  importances[ny] = SROC - 0.5;
2345  importances_norm += importances[ny];
2346  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2347  continue;
2348  }
2349 
2350  //creating loader for sub-seed
2351  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2352  //adding variables from sub-seed
2353  for (int index = 0; index < nbits; index++) {
2354  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2355  }
2356 
2357  //Loading Dataset
2358  DataLoaderCopy(subseedloader,loader);
2359 
2360  //Booking SubSeed
2361  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2362 
2363  //Train/Test/Evaluation
2364  TrainAllMethods();
2365  TestAllMethods();
2367 
2368  //getting ROC
2369  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2370  importances[ny] += SROC - SSROC;
2371  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
2372  //cleaning information
2373  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2375  delete ssresults;
2376  delete subseedloader;
2377  this->DeleteAllMethods();
2378  fMethodsMap.clear();
2379  }
2380  }
2381  }
2382  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2383  return GetImportance(nbits,importances,varNames);
2384 }
2385 
2386 ////////////////////////////////////////////////////////////////////////////////
2387 
2388 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2389 {
2390  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2391 
2392  gStyle->SetOptStat(000000);
2393 
2394  Float_t normalization = 0.0;
2395  for (int i = 0; i < nbits; i++) {
2396  normalization = normalization + importances[i];
2397  }
2398 
2399  Float_t roc = 0.0;
2400 
2401  gStyle->SetTitleXOffset(0.4);
2402  gStyle->SetTitleXOffset(1.2);
2403 
2404 
2405  Double_t x_ie[nbits], y_ie[nbits];
2406  for (Int_t i = 1; i < nbits + 1; i++) {
2407  x_ie[i - 1] = (i - 1) * 1.;
2408  roc = 100.0 * importances[i - 1] / normalization;
2409  y_ie[i - 1] = roc;
2410  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2411  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2412  vih1->SetBinContent(i, roc);
2413  }
2414  TGraph *g_ie = new TGraph(nbits + 2, x_ie, y_ie);
2415  g_ie->SetTitle("");
2416 
2417  vih1->LabelsOption("v >", "X");
2418  vih1->SetBarWidth(0.97);
2419  Int_t ca = TColor::GetColor("#006600");
2420  vih1->SetFillColor(ca);
2421  //Int_t ci = TColor::GetColor("#990000");
2422 
2423  vih1->GetYaxis()->SetTitle("Importance (%)");
2424  vih1->GetYaxis()->SetTitleSize(0.045);
2425  vih1->GetYaxis()->CenterTitle();
2426  vih1->GetYaxis()->SetTitleOffset(1.24);
2427 
2428  vih1->GetYaxis()->SetRangeUser(-7, 50);
2429  vih1->SetDirectory(0);
2430 
2431 // vih1->Draw("B");
2432  return vih1;
2433 }
2434 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
Definition: TAttAxis.cxx:262
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:366
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:778
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition: TAttLine.h:43
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2388
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
DataSetManager * fDataSetManager
Definition: DataLoader.h:189
static long int sum(long int i)
Definition: Factory.cxx:2162
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:575
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:343
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Definition: ROCCurve.cxx:199
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:199
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:4935
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1240
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:206
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:230
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8053
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1336
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:640
R__EXTERN TStyle * gStyle
Definition: TStyle.h:402
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2171
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Config & gConfig()
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:122
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
Bool_t Verbose(void) const
Definition: Factory.h:133
EAnalysisType
Definition: Types.h:125
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:824
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:530
virtual void MakeClass(const TString &classFileName=TString("")) const =0
TAxis * GetYaxis() const
Get y axis of the graph.
Definition: TGraph.cxx:1602
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
#define gROOT
Definition: TROOT.h:375
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:203
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
Definition: TDirectory.cxx:958
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1017
TAxis * GetXaxis() const
Get x axis of the graph.
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2180
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:896
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:524
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:225
const TString & GetLabel() const
Definition: VariableInfo.h:59
#define NULL
Definition: RtypesCore.h:88
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:362
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:184
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
MsgLogger * fLogger
Definition: Configurable.h:128
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:491
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2276
overwrite existing object with same name
Definition: TObject.h:79
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1218
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:338
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:827
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:927
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
#define READXML
Definition: Factory.cxx:104
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:589
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2052
const int ny
Definition: kalman.C:17
Bool_t fModelPersistence
the training type
Definition: Factory.h:212
TAxis * GetYaxis() const
Get y axis of the graph.
DataSet * Data() const
Definition: MethodBase.h:393
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:581
TString fWeightFileDir
Definition: Config.h:96
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
void PrintHelpMessage() const
prints out method-specific help method
std::vector< std::vector< double > > Data
IONames & GetIONames()
Definition: Config.h:74
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Bool_t DoRegression() const
Definition: MethodBase.h:422
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:66
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2075
Bool_t IsModelPersistence()
Definition: Factory.cxx:289
Class that contains all the data information.
Definition: DataSetInfo.h:60
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:85
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:207
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Class for boosting a TMVA method.
Definition: MethodBoost.h:56
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
Bool_t DoMulticlass() const
Definition: MethodBase.h:423
static void DestroyInstance()
Definition: Tools.cxx:90
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:99
Class that contains all the data information.
Definition: DataSet.h:69
virtual ~Factory()
Destructor.
Definition: Factory.cxx:297
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:622
TDirectory * RootBaseDir()
Definition: Factory.h:148
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9163
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
TH1F * GetHistogram() const
Returns a pointer to the histogram used to draw the axis.
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
Definition: Factory.cxx:766
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:175
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:539
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
SVector< double, 2 > v
Definition: Dict.h:5
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1313
const char * GetName() const
Definition: MethodBase.h:318
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:398
ClassInfo * GetClassInfo(Int_t clNum) const
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:507
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbose mode
Definition: Factory.h:205
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
Definition: Factory.cxx:1255
class TMVA::Config::VariablePlotting fVariablePlotting
void printMatrix(const M &m)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:563
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
void TestAllMethods()
Definition: Factory.cxx:1153
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
const TString & GetMethodName() const
Definition: MethodBase.h:315
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1707
TAxis * GetYaxis()
Definition: TH1.h:301
Class that contains all the data information.
void Greetings()
Print welcome message.
Definition: Factory.cxx:273
This is the main MVA steering class.
Definition: Factory.h:81
Tools & gTools()
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:862
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:81
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:335
TFile * fgTargetFile
Definition: Factory.h:196
virtual Double_t GetSignificance() const
compute significance of mean difference
Definition: graph.py:1
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:406
TString GetWeightFileName() const
retrieve weight file name
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
Definition: TAttAxis.cxx:272
TAxis * GetXaxis() const
Get x axis of the graph.
Definition: TGraph.cxx:1592
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
The Canvas class.
Definition: TCanvas.h:31
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:629
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Definition: MethodBase.h:359
Double_t y[n]
Definition: legend1.C:17
static void DestroyInstance()
static function: destroy TMVA instance
Definition: Config.cxx:88
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:572
UInt_t GetEntries(const TString &name) const
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:549
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:809
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1327
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
void PrintVariableRanking() const
prints ranking of input variables
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
const TString & GetOptions() const
Definition: Configurable.h:84
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:65
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:898
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void SetUseColor(Bool_t uc)
Definition: Config.h:57
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
#define VIBITS
Definition: Factory.cxx:107
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:382
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1213
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:256
TList * GetListOfGraphs() const
Definition: TMultiGraph.h:69
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:969
DataSetManager * fDataSetManager
Definition: MethodBoost.h:188
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:167
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Private method to generate an instance of a ROCCurve regardless of analysis type. ...
Definition: Factory.cxx:670
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:119
DataInputHandler & DataInput()
TString GetMethodTypeName() const
Definition: MethodBase.h:316
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetSilent(Bool_t s)
Definition: Config.h:60
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:149
Int_t IsNaN(Double_t x)
Definition: TMath.h:778
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
Definition: Factory.h:209
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:344
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:315
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1267
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1210
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Bool_t IsSilentFile()
Definition: Factory.cxx:282
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:421
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:211
virtual void SetTitle(const char *title)
Change (i.e.
Definition: TH1.cxx:6028
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1452
std::vector< IMethod * > MVector
Definition: Factory.h:84
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:96
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
static void EnableOutput()
Definition: MsgLogger.cxx:75
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1185
const TString & GetTestvarName() const
Definition: MethodBase.h:319
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:364
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:489
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:325
const Bool_t kTRUE
Definition: RtypesCore.h:91
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
Definition: MethodBase.h:317
void CheckForUnusedOptions() const
checks for unused options in option string
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:420
double log(double)
Class that is the base-class for a vector of result.
TAxis * GetXaxis()
Definition: TH1.h:300
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:204
const char * Data() const
Definition: TString.h:347