Logo ROOT   6.08/07
Reference Guide
Factory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
4 
5 /**********************************************************************************
6  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7  * Package: TMVA *
8  * Class : Factory *
9  * Web : http://tmva.sourceforge.net *
10  * *
11  * Description: *
12  * This is the main MVA steering class: it creates (books) all MVA methods, *
13  * and guides them through the training, testing and evaluation phases. *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
18  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
22  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
23  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
24  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
25  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * UdeA/ITM, Colombia *
33  * U. of Florida, USA *
34  * *
35  * Redistribution and use in source and binary forms, with or without *
36  * modification, are permitted according to the terms listed in LICENSE *
37  * (http://tmva.sourceforge.net/LICENSE) *
38  **********************************************************************************/
39 
40 #ifndef ROOT_TMVA_Factory
41 #define ROOT_TMVA_Factory
42 
43 //////////////////////////////////////////////////////////////////////////
44 // //
45 // Factory //
46 // //
47 // This is the main MVA steering class: it creates all MVA methods, //
48 // and guides them through the training, testing and evaluation //
49 // phases //
50 // //
51 //////////////////////////////////////////////////////////////////////////
52 
53 #include <string>
54 #include <vector>
55 #include <map>
56 #ifndef ROOT_TCut
57 #include "TCut.h"
58 #endif
59 
60 #ifndef ROOT_TMVA_Configurable
61 #include "TMVA/Configurable.h"
62 #endif
63 #ifndef ROOT_TMVA_Types
64 #include "TMVA/Types.h"
65 #endif
66 #ifndef ROOT_TMVA_DataSet
67 #include "TMVA/DataSet.h"
68 #endif
69 
70 class TFile;
71 class TTree;
72 class TDirectory;
73 class TCanvas;
74 class TGraph;
75 class TH1F;
76 namespace TMVA {
77 
78  class IMethod;
79  class MethodBase;
80  class DataInputHandler;
81  class DataSetInfo;
82  class DataSetManager;
83  class DataLoader;
84  class VariableTransformBase;
85 
86 
87  class Factory : public Configurable {
88  public:
89 
90  typedef std::vector<IMethod*> MVector;
91  std::map<TString,MVector*> fMethodsMap;//all methods for every dataset with the same name
92 
93  // no default constructor
94  Factory( TString theJobName, TFile* theTargetFile, TString theOption = "" );
95 
96  // contructor to work without file
97  Factory( TString theJobName, TString theOption = "" );
98 
99  // default destructor
100  virtual ~Factory();
101 
102  virtual const char* GetName() const { return "Factory"; }
103 
104 
105  MethodBase* BookMethod( DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption = "" );
106  MethodBase* BookMethod( DataLoader *loader, Types::EMVA theMethod, TString methodTitle, TString theOption = "" );
108  TString /*methodTitle*/,
109  TString /*methodOption*/,
110  TMVA::Types::EMVA /*theComposite*/,
111  TString /*compositeOption = ""*/ ) { return 0; }
112 
113  // optimize all booked methods (well, if desired by the method)
114  std::map<TString,Double_t> OptimizeAllMethods (TString fomType="ROCIntegral", TString fitType="FitGA");
115  void OptimizeAllMethodsForClassification(TString fomType="ROCIntegral", TString fitType="FitGA") { OptimizeAllMethods(fomType,fitType); }
116  void OptimizeAllMethodsForRegression (TString fomType="ROCIntegral", TString fitType="FitGA") { OptimizeAllMethods(fomType,fitType); }
117 
118  // training for all booked methods
119  void TrainAllMethods ();
122 
123  // testing
124  void TestAllMethods();
125 
126  // performance evaluation
127  void EvaluateAllMethods( void );
128  void EvaluateAllVariables(DataLoader *loader, TString options = "" );
129 
130  TH1F* EvaluateImportance( DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
131 
132  // delete all methods and reset the method vector
133  void DeleteAllMethods( void );
134 
135  // accessors
136  IMethod* GetMethod( const TString& datasetname, const TString& title )const;
137 
138  Bool_t Verbose( void ) const { return fVerbose; }
139  void SetVerbose( Bool_t v=kTRUE );
140 
141  // make ROOT-independent C++ class for classifier response
142  // (classifier-specific implementation)
143  // If no classifier name is given, help messages for all booked
144  // classifiers are printed
145  virtual void MakeClass(const TString& datasetname , const TString& methodTitle = "" ) const;
146 
147  // prints classifier-specific hepl messages, dedicated to
148  // help with the optimisation and configuration options tuning.
149  // If no classifier name is given, help messages for all booked
150  // classifiers are printed
151  void PrintHelpMessage(const TString& datasetname , const TString& methodTitle = "" ) const;
152 
154 
157 
158  Double_t GetROCIntegral(DataLoader *loader,TString theMethodName);
159  Double_t GetROCIntegral(TString datasetname,TString theMethodName);
160 
161  //methods to get TGraph for a indicate method in dataset
162  //optional tiitle and axis added with fLegend=kTRUE
163  TGraph* GetROCCurve(DataLoader *loader,TString theMethodName,Bool_t fLegend=kTRUE);
164  TGraph* GetROCCurve(TString datasetname,TString theMethodName,Bool_t fLegend=kTRUE);
165 
166  // Draw all ROC curves for all methods in the dataset.
167  TCanvas* GetROCCurve(DataLoader *loader);
168  TCanvas* GetROCCurve(TString datasetname);
169 
170  private:
171 
172  // the beautiful greeting message
173  void Greetings();
174 
175  //evaluate the simple case that is removing 1 variable at time
176  TH1F* EvaluateImportanceShort( DataLoader *loader,Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
177  //evaluate all variables combinations
178  TH1F* EvaluateImportanceAll( DataLoader *loader,Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
179  //evaluate randomly given a number of seeds
180  TH1F* EvaluateImportanceRandom( DataLoader *loader,UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
181 
182  TH1F* GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames);
183 
184  void WriteDataInformation(DataSetInfo& fDataSetInfo);
185 
187 
188  private:
189 
190  // data members
191 
192  TFile* fgTargetFile; //! ROOT output file
193 
194 
195  std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; //! list of transformations on default DataSet
196 
197  // cd to local directory
198  TString fOptions; //! option string given by construction (presently only "V")
199  TString fTransformations; //! List of transformations to test
200  Bool_t fVerbose; //! verbose mode
201  Bool_t fCorrelations; //! enable to calculate corelations
202  Bool_t fROC; //! enable to calculate ROC values
203  Bool_t fSilentFile; //! used in contructor wihtout file
204 
205  TString fJobName; //! jobname, used as extension in weight file names
206 
207  Types::EAnalysisType fAnalysisType; //! the training type
208  Bool_t fModelPersistence;//!option to save the trained model in xml file or using serialization
209 
210 
211  protected:
212 
213  ClassDef(Factory,0); // The factory creates all MVA methods, and performs their training and testing
214  };
215 
216 } // namespace TMVA
217 
218 #endif
219 
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2009
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName)
Definition: Factory.cxx:639
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Definition: Factory.cxx:337
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:195
void EvaluateAllVariables(DataLoader *loader, TString options="")
iterates over all MVA input varables and evaluates them
Definition: Factory.cxx:1044
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:202
void OptimizeAllMethodsForClassification(TString fomType="ROCIntegral", TString fitType="FitGA")
Definition: Factory.h:115
VIType
Definition: Types.h:75
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1794
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
Bool_t Verbose(void) const
Definition: Factory.h:138
EAnalysisType
Definition: Types.h:129
void OptimizeAllMethodsForRegression(TString fomType="ROCIntegral", TString fitType="FitGA")
Definition: Factory.h:116
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:199
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:822
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:499
void TrainAllMethodsForClassification(void)
Definition: Factory.h:120
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1897
void TrainAllMethodsForRegression(void)
Definition: Factory.h:121
#define ClassDef(name, id)
Definition: Rtypes.h:254
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:1678
Bool_t fModelPersistence
the training type
Definition: Factory.h:208
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1699
Bool_t IsModelPersistence()
Definition: Factory.cxx:284
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:91
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:203
virtual ~Factory()
destructor delete fATreeEvent;
Definition: Factory.cxx:293
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:597
TDirectory * RootBaseDir()
Definition: Factory.h:153
SVector< double, 2 > v
Definition: Dict.h:5
Bool_t fCorrelations
verbose mode
Definition: Factory.h:201
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1059
void TestAllMethods()
Definition: Factory.cxx:958
unsigned int UInt_t
Definition: RtypesCore.h:42
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
Definition: Factory.cxx:270
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:331
TFile * fgTargetFile
Definition: Factory.h:192
void SetInputTreesFromEventAssignTrees()
The Canvas class.
Definition: TCanvas.h:41
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t fLegend=kTRUE)
Definition: Factory.cxx:687
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
Definition: Factory.cxx:1017
Abstract ClassifierFactory template that handles arbitrary types.
TString fOptions
list of transformations on default DataSet
Definition: Factory.h:198
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
Definition: Factory.cxx:116
TString fJobName
used in contructor wihtout file
Definition: Factory.h:205
MethodBase * BookMethod(DataLoader *, TMVA::Types::EMVA, TString, TString, TMVA::Types::EMVA, TString)
Definition: Factory.h:107
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
void DeleteAllMethods(void)
delete methods
Definition: Factory.cxx:311
Bool_t IsSilentFile()
Definition: Factory.cxx:278
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:207
std::vector< IMethod * > MVector
Definition: Factory.h:90
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:102
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:989
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Definition: Factory.cxx:481
const Bool_t kTRUE
Definition: Rtypes.h:91
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:200