Logo ROOT   6.10/09
Reference Guide
Factory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
4 
5 /**********************************************************************************
6  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7  * Package: TMVA *
8  * Class : Factory *
9  * Web : http://tmva.sourceforge.net *
10  * *
11  * Description: *
12  * This is the main MVA steering class: it creates (books) all MVA methods, *
13  * and guides them through the training, testing and evaluation phases. *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
18  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
22  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
23  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
24  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
25  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * UdeA/ITM, Colombia *
33  * U. of Florida, USA *
34  * *
35  * Redistribution and use in source and binary forms, with or without *
36  * modification, are permitted according to the terms listed in LICENSE *
37  * (http://tmva.sourceforge.net/LICENSE) *
38  **********************************************************************************/
39 
40 #ifndef ROOT_TMVA_Factory
41 #define ROOT_TMVA_Factory
42 
43 //////////////////////////////////////////////////////////////////////////
44 // //
45 // Factory //
46 // //
47 // This is the main MVA steering class: it creates all MVA methods, //
48 // and guides them through the training, testing and evaluation //
49 // phases //
50 // //
51 //////////////////////////////////////////////////////////////////////////
52 
53 #include <string>
54 #include <vector>
55 #include <map>
56 #include "TCut.h"
57 
58 #include "TMVA/Configurable.h"
59 #include "TMVA/Types.h"
60 #include "TMVA/DataSet.h"
61 
62 class TCanvas;
63 class TDirectory;
64 class TFile;
65 class TGraph;
66 class TH1F;
67 class TMultiGraph;
68 class TTree;
69 namespace TMVA {
70 
71  class IMethod;
72  class MethodBase;
73  class DataInputHandler;
74  class DataSetInfo;
75  class DataSetManager;
76  class DataLoader;
77  class ROCCurve;
78  class VariableTransformBase;
79 
80 
81  class Factory : public Configurable {
82  public:
83 
84  typedef std::vector<IMethod*> MVector;
85  std::map<TString,MVector*> fMethodsMap;//all methods for every dataset with the same name
86 
87  // no default constructor
88  Factory( TString theJobName, TFile* theTargetFile, TString theOption = "" );
89 
90  // contructor to work without file
91  Factory( TString theJobName, TString theOption = "" );
92 
93  // default destructor
94  virtual ~Factory();
95 
96  virtual const char* GetName() const { return "Factory"; }
97 
98 
99  MethodBase* BookMethod( DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption = "" );
100  MethodBase* BookMethod( DataLoader *loader, Types::EMVA theMethod, TString methodTitle, TString theOption = "" );
102  TString /*methodTitle*/,
103  TString /*methodOption*/,
104  TMVA::Types::EMVA /*theComposite*/,
105  TString /*compositeOption = ""*/ ) { return 0; }
106 
107  // optimize all booked methods (well, if desired by the method)
108  std::map<TString,Double_t> OptimizeAllMethods (TString fomType="ROCIntegral", TString fitType="FitGA");
109  void OptimizeAllMethodsForClassification(TString fomType="ROCIntegral", TString fitType="FitGA") { OptimizeAllMethods(fomType,fitType); }
110  void OptimizeAllMethodsForRegression (TString fomType="ROCIntegral", TString fitType="FitGA") { OptimizeAllMethods(fomType,fitType); }
111 
112  // training for all booked methods
113  void TrainAllMethods ();
116 
117  // testing
118  void TestAllMethods();
119 
120  // performance evaluation
121  void EvaluateAllMethods( void );
122  void EvaluateAllVariables(DataLoader *loader, TString options = "" );
123 
124  TH1F* EvaluateImportance( DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
125 
126  // delete all methods and reset the method vector
127  void DeleteAllMethods( void );
128 
129  // accessors
130  IMethod* GetMethod( const TString& datasetname, const TString& title ) const;
131  Bool_t HasMethod( const TString& datasetname, const TString& title ) const;
132 
133  Bool_t Verbose( void ) const { return fVerbose; }
134  void SetVerbose( Bool_t v=kTRUE );
135 
136  // make ROOT-independent C++ class for classifier response
137  // (classifier-specific implementation)
138  // If no classifier name is given, help messages for all booked
139  // classifiers are printed
140  virtual void MakeClass(const TString& datasetname , const TString& methodTitle = "" ) const;
141 
142  // prints classifier-specific hepl messages, dedicated to
143  // help with the optimisation and configuration options tuning.
144  // If no classifier name is given, help messages for all booked
145  // classifiers are printed
146  void PrintHelpMessage(const TString& datasetname , const TString& methodTitle = "" ) const;
147 
149 
152 
153  Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass = 0);
154  Double_t GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass = 0);
155 
156  // Methods to get a TGraph for an indicated method in dataset.
157  // Optional title and axis added with fLegend=kTRUE.
158  // Argument iClass used in multiclass settings, otherwise ignored.
159  TGraph* GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0);
160  TGraph* GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0);
161 
162  // Methods to get a TMultiGraph for a given class and all methods in dataset.
164  TMultiGraph* GetROCCurveAsMultiGraph(TString datasetname, UInt_t iClass);
165 
166  // Draw all ROC curves of a given class for all methods in the dataset.
167  TCanvas* GetROCCurve(DataLoader *loader, UInt_t iClass=0);
168  TCanvas* GetROCCurve(TString datasetname, UInt_t iClass=0);
169 
170  private:
171 
172  // the beautiful greeting message
173  void Greetings();
174 
175  //evaluate the simple case that is removing 1 variable at time
176  TH1F* EvaluateImportanceShort( DataLoader *loader,Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
177  //evaluate all variables combinations
178  TH1F* EvaluateImportanceAll( DataLoader *loader,Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
179  //evaluate randomly given a number of seeds
180  TH1F* EvaluateImportanceRandom( DataLoader *loader,UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption = "" );
181 
182  TH1F* GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames);
183 
184  // Helpers for public facing ROC methods
185  ROCCurve *GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass = 0);
186  ROCCurve *GetROC(TString datasetname, TString theMethodName, UInt_t iClass = 0);
187 
188  void WriteDataInformation(DataSetInfo& fDataSetInfo);
189 
191 
192  private:
193 
194  // data members
195 
196  TFile* fgTargetFile; //! ROOT output file
197 
198 
199  std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; //! list of transformations on default DataSet
200 
201  // cd to local directory
202  TString fOptions; //! option string given by construction (presently only "V")
203  TString fTransformations; //! List of transformations to test
204  Bool_t fVerbose; //! verbose mode
205  Bool_t fCorrelations; //! enable to calculate corelations
206  Bool_t fROC; //! enable to calculate ROC values
207  Bool_t fSilentFile; //! used in contructor wihtout file
208 
209  TString fJobName; //! jobname, used as extension in weight file names
210 
211  Types::EAnalysisType fAnalysisType; //! the training type
212  Bool_t fModelPersistence;//!option to save the trained model in xml file or using serialization
213 
214 
215  protected:
216 
217  ClassDef(Factory,0); // The factory creates all MVA methods, and performs their training and testing
218  };
219 
220 } // namespace TMVA
221 
222 #endif
223 
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2388
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:343
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:199
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1240
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:206
void OptimizeAllMethodsForClassification(TString fomType="ROCIntegral", TString fitType="FitGA")
Definition: Factory.h:109
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2171
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
Bool_t Verbose(void) const
Definition: Factory.h:133
EAnalysisType
Definition: Types.h:125
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
void OptimizeAllMethodsForRegression(TString fomType="ROCIntegral", TString fitType="FitGA")
Definition: Factory.h:110
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:203
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1017
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:896
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:524
void TrainAllMethodsForClassification(void)
Definition: Factory.h:114
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2276
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:827
void TrainAllMethodsForRegression(void)
Definition: Factory.h:115
#define ClassDef(name, id)
Definition: Rtypes.h:297
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2052
Bool_t fModelPersistence
the training type
Definition: Factory.h:212
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2075
Bool_t IsModelPersistence()
Definition: Factory.cxx:289
Class that contains all the data information.
Definition: DataSetInfo.h:60
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:85
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:207
virtual ~Factory()
Destructor.
Definition: Factory.cxx:297
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:622
TDirectory * RootBaseDir()
Definition: Factory.h:148
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
Definition: Factory.cxx:766
SVector< double, 2 > v
Definition: Dict.h:5
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:507
Bool_t fCorrelations
verbose mode
Definition: Factory.h:205
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
Definition: Factory.cxx:1255
void TestAllMethods()
Definition: Factory.cxx:1153
unsigned int UInt_t
Definition: RtypesCore.h:42
void Greetings()
Print welcome message.
Definition: Factory.cxx:273
This is the main MVA steering class.
Definition: Factory.h:81
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:335
TFile * fgTargetFile
Definition: Factory.h:196
void SetInputTreesFromEventAssignTrees()
The Canvas class.
Definition: TCanvas.h:31
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:34
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1213
Abstract ClassifierFactory template that handles arbitrary types.
TString fOptions
list of transformations on default DataSet
Definition: Factory.h:202
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Private method to generate an instance of a ROCCurve regardless of analysis type. ...
Definition: Factory.cxx:670
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:119
TString fJobName
used in contructor wihtout file
Definition: Factory.h:209
MethodBase * BookMethod(DataLoader *, TMVA::Types::EMVA, TString, TString, TMVA::Types::EMVA, TString)
Definition: Factory.h:101
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:315
Bool_t IsSilentFile()
Definition: Factory.cxx:282
A TTree object has a header with a name and a title.
Definition: TTree.h:78
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:211
std::vector< IMethod * > MVector
Definition: Factory.h:84
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:96
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1185
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:489
const Bool_t kTRUE
Definition: RtypesCore.h:91
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:204