Logo ROOT   6.08/07
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 #ifndef ROOT_TMVA_MethodBoost
33 #define ROOT_TMVA_MethodBoost
34 
35 //////////////////////////////////////////////////////////////////////////
36 // //
37 // MethodBoost //
38 // //
39 // Class for boosting a TMVA method //
40 // //
41 //////////////////////////////////////////////////////////////////////////
42 
43 #include <iosfwd>
44 #include <vector>
45 
46 #ifndef ROOT_TMVA_MethodBase
47 #include "TMVA/MethodBase.h"
48 #endif
49 
50 #ifndef ROOT_TMVA_MethodCompositeBase
52 #endif
53 
54 namespace TMVA {
55 
56  class Factory; // DSMTEST
57  class Reader; // DSMTEST
58  class DataSetManager; // DSMTEST
59 
61 
62  public :
63 
64  // constructors
65  MethodBoost( const TString& jobName,
66  const TString& methodTitle,
67  DataSetInfo& theData,
68  const TString& theOption = "" );
69 
71  const TString& theWeightFile );
72 
73  virtual ~MethodBoost( void );
74 
75  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
76 
77  // training and boosting all the classifiers
78  void Train( void );
79 
80  // ranking of input variables
81  const Ranking* CreateRanking();
82 
83  // saves the name and options string of the boosted classifier
84  Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
85  void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
86 
87  Int_t GetBoostNum() { return fBoostNum; }
88 
89  void CleanBoostOptions();
90 
91  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
92 
93  private :
94  // clean up
95  void ClearAll();
96 
97  // print fit results
98  void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
99 
100  // initializing mostly monitoring tools of the boost process
101  void Init();
102  void InitHistos();
103  void CheckSetup();
104 
105  void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
106 
107  // the option handling methods
108  void DeclareOptions();
110  void ProcessOptions();
111 
112 
115  // training a single classifier
116  void SingleTrain();
117 
118  // calculating a boosting weight from the classifier, storing it in the next one
120  Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
121  Double_t Bagging();
122 
123 
124  // calculate weight of single method
126 
127  // return ROC integral on training/testing sample
129 
130  // writing the monitoring histograms and tree to a file
131  void WriteMonitoringHistosToFile( void ) const;
132 
133  // write evaluation histograms into target file
134  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
135 
136  // performs the MethodBase testing + testing of each boosted classifier
137  virtual void TestClassification();
138 
139  // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
140  void FindMVACut(MethodBase* method);
141 
142  // setting all the boost weights to 1
143  void ResetBoostWeights();
144 
145  // creating the vectors of histogram for monitoring MVA response of each classifier
146  void CreateMVAHistorgrams();
147 
148  // calculate MVA values of current trained method on training
149  // sample
150  void CalcMVAValues();
151 
152  UInt_t fBoostNum; // Number of times the classifier is boosted
153  TString fBoostType; // string specifying the boost type
154 
155  TString fTransformString; // min and max values for the classifier response
156  Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
157 
158  Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
159  UInt_t fRandomSeed; // seed for random number generator used for bagging
160  Double_t fBaggedSampleFraction;// rel.Size of bagged sample
161 
162  TString fBoostedMethodName; // details of the boosted classifier
165 
166  Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
167 
168  // MVA output from each classifier over the training hist, using orignal events weights
169  std::vector< TH1* > fTrainSigMVAHist;
170  std::vector< TH1* > fTrainBgdMVAHist;
171  // MVA output from each classifier over the training hist, using boosted events weights
172  std::vector< TH1* > fBTrainSigMVAHist;
173  std::vector< TH1* > fBTrainBgdMVAHist;
174  // MVA output from each classifier over the testing hist
175  std::vector< TH1* > fTestSigMVAHist;
176  std::vector
178 
179  //monitoring tree/ntuple and it's variables
180  TTree* fMonitorTree; // tree to monitor values during the boosting
181  Double_t fBoostWeight; // the weight used to boost the next classifier
182  Double_t fMethodError; // estimation of the level error of the classifier
183  // analysing the train dataset
184  Double_t fROC_training; // roc integral of last trained method (on training sample)
185 
186  // overlap integral of mva distributions for signal and
187  // background (training sample)
189 
190  std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
191 
193  friend class Factory; // DSMTEST
194  friend class Reader; // DSMTEST
195 
196  TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
197  Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
198 
199  protected:
200 
201  // get help message text
202  void GetHelpMessage() const;
203 
205  };
206 }
207 
208 #endif
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:190
Double_t CalcMethodWeight()
Double_t fROC_training
Definition: MethodBoost.h:184
void SingleTrain()
initialization
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:175
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
Int_t GetBoostNum()
Definition: MethodBoost.h:87
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:156
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:129
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:90
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:114
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:170
const Ranking * CreateRanking()
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fOverlap_integral
Definition: MethodBoost.h:188
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
#define ClassDef(name, id)
Definition: Rtypes.h:254
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
virtual ~MethodBoost(void)
destructor
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
TString fHistoricOption
Definition: MethodBoost.h:196
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:160
TString fTransformString
Definition: MethodBoost.h:155
unsigned int UInt_t
Definition: RtypesCore.h:42
MethodBase * CurrentMethod()
Definition: MethodBoost.h:113
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:85
void CreateMVAHistorgrams()
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:164
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:166
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:169
TString fBoostedMethodTitle
Definition: MethodBoost.h:163
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:80
Double_t fMethodError
Definition: MethodBoost.h:182
void GetHelpMessage() const
Get help message text.
virtual void TestClassification()
initialization
TString fBoostedMethodName
Definition: MethodBoost.h:162
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:158
DataSetManager * fDataSetManager
Definition: MethodBoost.h:192
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:197
void InitHistos()
initialisation routine
A TTree object has a header with a name and a title.
Definition: TTree.h:98
TTree * fMonitorTree
Definition: MethodBoost.h:180
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:172
Double_t fBoostWeight
Definition: MethodBoost.h:181
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:173
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:177