Logo ROOT   6.21/01
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 #ifndef ROOT_TMVA_MethodBoost
33 #define ROOT_TMVA_MethodBoost
34 
35 //////////////////////////////////////////////////////////////////////////
36 // //
37 // MethodBoost //
38 // //
39 // Class for boosting a TMVA method //
40 // //
41 //////////////////////////////////////////////////////////////////////////
42 
43 #include <iosfwd>
44 #include <vector>
45 
46 #include "TMVA/MethodBase.h"
47 
49 
50 namespace TMVA {
51 
52  class Factory; // DSMTEST
53  class Reader; // DSMTEST
54  class DataSetManager; // DSMTEST
55  namespace Experimental {
56  class Classification;
57  }
59  friend class Factory; // DSMTEST
60  friend class Reader; // DSMTEST
62 
63  public :
64 
65  // constructors
66  MethodBoost( const TString& jobName,
67  const TString& methodTitle,
68  DataSetInfo& theData,
69  const TString& theOption = "" );
70 
72  const TString& theWeightFile );
73 
74  virtual ~MethodBoost( void );
75 
76  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
77 
78  // training and boosting all the classifiers
79  void Train( void );
80 
81  // ranking of input variables
82  const Ranking* CreateRanking();
83 
84  // saves the name and options string of the boosted classifier
85  Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
86  void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
87 
88  Int_t GetBoostNum() { return fBoostNum; }
89 
90  void CleanBoostOptions();
91 
92  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
93 
94  private :
95  // clean up
96  void ClearAll();
97 
98  // print fit results
99  void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
100 
101  // initializing mostly monitoring tools of the boost process
102  void Init();
103  void InitHistos();
104  void CheckSetup();
105 
106  void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
107 
108  // the option handling methods
109  void DeclareOptions();
111  void ProcessOptions();
112 
113 
116  // training a single classifier
117  void SingleTrain();
118 
119  // calculating a boosting weight from the classifier, storing it in the next one
121  Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
122  Double_t Bagging();
123 
124 
125  // calculate weight of single method
127 
128  // return ROC integral on training/testing sample
130 
131  // writing the monitoring histograms and tree to a file
132  void WriteMonitoringHistosToFile( void ) const;
133 
134  // write evaluation histograms into target file
135  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
136 
137  // performs the MethodBase testing + testing of each boosted classifier
138  virtual void TestClassification();
139 
140  // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
141  void FindMVACut(MethodBase* method);
142 
143  // setting all the boost weights to 1
144  void ResetBoostWeights();
145 
146  // creating the vectors of histogram for monitoring MVA response of each classifier
147  void CreateMVAHistorgrams();
148 
149  // calculate MVA values of current trained method on training
150  // sample
151  void CalcMVAValues();
152 
153  UInt_t fBoostNum; // Number of times the classifier is boosted
154  TString fBoostType; // string specifying the boost type
155 
156  TString fTransformString; // min and max values for the classifier response
157  Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
158 
159  Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
160  UInt_t fRandomSeed; // seed for random number generator used for bagging
161  Double_t fBaggedSampleFraction;// rel.Size of bagged sample
162 
163  TString fBoostedMethodName; // details of the boosted classifier
166 
167  Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
168 
169  // MVA output from each classifier over the training hist, using orignal events weights
170  std::vector< TH1* > fTrainSigMVAHist;
171  std::vector< TH1* > fTrainBgdMVAHist;
172  // MVA output from each classifier over the training hist, using boosted events weights
173  std::vector< TH1* > fBTrainSigMVAHist;
174  std::vector< TH1* > fBTrainBgdMVAHist;
175  // MVA output from each classifier over the testing hist
176  std::vector< TH1* > fTestSigMVAHist;
177  std::vector
179 
180  //monitoring tree/ntuple and it's variables
181  TTree* fMonitorTree; // tree to monitor values during the boosting
182  Double_t fBoostWeight; // the weight used to boost the next classifier
183  Double_t fMethodError; // estimation of the level error of the classifier
184  // analysing the train dataset
185  Double_t fROC_training; // roc integral of last trained method (on training sample)
186 
187  // overlap integral of mva distributions for signal and
188  // background (training sample)
190 
191  std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
192 
194  TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
195  Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
196 
197  protected:
198 
199  // get help message text
200  void GetHelpMessage() const;
201 
203  };
204 }
205 
206 #endif
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:191
Double_t CalcMethodWeight()
Double_t fROC_training
Definition: MethodBoost.h:185
void SingleTrain()
initialization
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:176
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
Int_t GetBoostNum()
Definition: MethodBoost.h:88
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:157
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:127
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:115
Basic string class.
Definition: TString.h:131
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:171
const Ranking * CreateRanking()
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fOverlap_integral
Definition: MethodBoost.h:189
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
#define ClassDef(name, id)
Definition: Rtypes.h:326
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
TString fHistoricOption
Definition: MethodBoost.h:194
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:161
TString fTransformString
Definition: MethodBoost.h:156
unsigned int UInt_t
Definition: RtypesCore.h:42
MethodBase * CurrentMethod()
Definition: MethodBoost.h:114
This is the main MVA steering class.
Definition: Factory.h:81
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
void CreateMVAHistorgrams()
const Bool_t kFALSE
Definition: RtypesCore.h:88
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:165
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:167
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Class that contains all the data information.
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:170
TString fBoostedMethodTitle
Definition: MethodBoost.h:164
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:56
Double_t fMethodError
Definition: MethodBoost.h:183
void GetHelpMessage() const
Get help message text.
virtual void TestClassification()
initialization
The Reader class serves to use the MVAs in a specific analysis context.
Definition: Reader.h:63
TString fBoostedMethodName
Definition: MethodBoost.h:163
create variable transformations
Double_t fAdaBoostBeta
Definition: MethodBoost.h:159
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:195
void InitHistos()
initialisation routine
A TTree represents a columnar dataset.
Definition: TTree.h:72
TTree * fMonitorTree
Definition: MethodBoost.h:181
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:173
Double_t fBoostWeight
Definition: MethodBoost.h:182
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:174
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:178