Logo ROOT   6.08/07
Reference Guide
MethodCuts.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCuts *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multivariate optimisation of signal efficiency for given background *
12  * efficiency, using rectangular minimum and maximum requirements on *
13  * input variables *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * LAPP, Annecy, France *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodCuts
34 #define ROOT_TMVA_MethodCuts
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodCuts //
39 // //
40 // Multivariate optimisation of signal efficiency for given background //
41 // efficiency, using rectangular minimum and maximum requirements on //
42 // input variables //
43 // //
44 //////////////////////////////////////////////////////////////////////////
45 
46 #include <vector>
47 #include <map>
48 
49 #ifndef ROOT_TMVA_MethodBase
50 #include "TMVA/MethodBase.h"
51 #endif
52 #ifndef ROOT_TMVA_BinarySearchTree
53 #include "TMVA/BinarySearchTree.h"
54 #endif
55 #ifndef ROOT_TMVA_PDF
56 #include "TMVA/PDF.h"
57 #endif
58 #ifndef ROOT_TMVA_TMatrixDfwd
59 #ifndef ROOT_TMatrixDfwd
60 #include "TMatrixDfwd.h"
61 #endif
62 #endif
63 #ifndef ROOT_TMVA_IFitterTarget
64 #ifndef ROOT_IFitterTarget
65 #include "IFitterTarget.h"
66 #endif
67 #endif
68 
69 class TRandom;
70 
71 namespace TMVA {
72 
73  class Interval;
74 
75  class MethodCuts : public MethodBase, public IFitterTarget {
76 
77  public:
78 
79  MethodCuts( const TString& jobName,
80  const TString& methodTitle,
81  DataSetInfo& theData,
82  const TString& theOption = "MC:150:10000:");
83 
84  MethodCuts( DataSetInfo& theData,
85  const TString& theWeightFile);
86 
87  // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
88  static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
89 
90  virtual ~MethodCuts( void );
91 
92  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
93 
94  // training method
95  void Train( void );
96 
98 
99  void AddWeightsXMLTo ( void* parent ) const;
100 
101  void ReadWeightsFromStream( std::istream & i );
102  void ReadWeightsFromXML ( void* wghtnode );
103 
104  // calculate the MVA value (for CUTs this is just a dummy)
105  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
106 
107  // write method specific histos to target file
108  void WriteMonitoringHistosToFile( void ) const;
109 
110  // test the method
111  void TestClassification();
112 
113  // also overwrite --> not computed for cuts
114  Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
115  Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
116  Double_t GetSignificance( void ) const { return -1; }
117  Double_t GetmuTransform ( TTree *) { return -1; }
120 
121  // rarity distributions (signal or background (default) is uniform in [0,1])
122  Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
123 
124  // accessors for Minuit
125  Double_t ComputeEstimator( std::vector<Double_t> & );
126 
127  Double_t EstimatorFunction( std::vector<Double_t> & );
128  Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
129 
131 
132  // retrieve cut values for given signal efficiency
133  void PrintCuts( Double_t effS ) const;
134  Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
135  Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
136 
137  // ranking of input variables (not available for cuts)
138  const Ranking* CreateRanking() { return 0; }
139 
140  void DeclareOptions();
141  void ProcessOptions();
142 
143  // maximum |cut| value
144  static const Double_t fgMaxAbsCutVal;
145 
146  // no check of options at this place
147  void CheckSetup() {}
148 
149  protected:
150 
151  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
152  void MakeClassSpecific( std::ostream&, const TString& ) const;
153 
154  // get help message text
155  void GetHelpMessage() const;
156 
157  private:
158 
159  // optimisation method
166 
167  // efficiency calculation method
168  // - kUseEventSelection: computes efficiencies from given data sample
169  // - kUsePDFs : creates smoothed PDFs from data samples, and
170  // uses this to compute efficiencies
173 
174  // improve the Monte Carlo by providing some additional information
179 
180  // general
181  TString fFitMethodS; // chosen fit method (string)
182  EFitMethodType fFitMethod; // chosen fit method
183  TString fEffMethodS; // chosen efficiency calculation method (string)
184  EEffMethod fEffMethod; // chosen efficiency calculation method
185  std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
186  Double_t fTestSignalEff; // used to test optimized signal efficiency
187  Double_t fEffSMin; // used to test optimized signal efficiency
188  Double_t fEffSMax; // used to test optimized signal efficiency
189  Double_t* fCutRangeMin; // minimum of allowed cut range
190  Double_t* fCutRangeMax; // maximum of allowed cut range
191  std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
192 
193  // for the use of the binary tree method
196 
197  // MC method
198  Double_t** fCutMin; // minimum requirement
199  Double_t** fCutMax; // maximum requirement
200  Double_t* fTmpCutMin; // temporary minimum requirement
201  Double_t* fTmpCutMax; // temporary maximum requirement
202  TString* fAllVarsI; // what to do with variables
203 
204  // relevant for all methods
205  Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
206  Double_t fEffRef; // reference efficiency
207  std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
208  TRandom* fRandom; // random generator for MC optimisation method
209 
210  // basic statistics
211  std::vector<Double_t>* fMeanS; // means of variables (signal)
212  std::vector<Double_t>* fMeanB; // means of variables (background)
213  std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
214  std::vector<Double_t>* fRmsB; // RMSs of variables (background)
215 
216  TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
217 
218  // PDF section
219  std::vector<TH1*>* fVarHistS; // reference histograms (signal)
220  std::vector<TH1*>* fVarHistB; // reference histograms (background)
221  std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
222  std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
223  std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
224  std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
225 
226  // negative efficiencies
227  Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
228 
229 
230  // the definition of fit parameters can be different from the actual
231  // cut requirements; these functions provide the matching
232  void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
234 
235  void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
236  void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
237 
238  // creates PDFs in case these are used to compute efficiencies
239  // (corresponds to: EffMethod == kUsePDFs)
240  void CreateVariablePDFs( void );
241 
242  // returns signal and background efficiencies for given cuts - using event counting
243  void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
244  Double_t& effS, Double_t& effB );
245  // returns signal and background efficiencies for given cuts - using PDFs
246  void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
247  Double_t& effS, Double_t& effB );
248 
249  // default initialisation method called by all constructors
250  void Init( void );
251 
252  ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
253  };
254 
255 } // namespace TMVA
256 
257 #endif
std::vector< Double_t > * fRmsS
Definition: MethodCuts.h:213
EEffMethod fEffMethod
Definition: MethodCuts.h:184
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Definition: MethodCuts.cxx:444
void GetHelpMessage() const
get help message text
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
TString fEffMethodS
Definition: MethodCuts.h:183
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA there are two requirements: 1) the signal efficiency m...
Definition: MethodCuts.cxx:901
void TestClassification()
nothing to test
Definition: MethodCuts.cxx:837
Double_t GetSignificance(void) const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
Definition: MethodCuts.h:116
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:")
standard constructor
Definition: MethodCuts.cxx:143
Double_t fTestSignalEff
Definition: MethodCuts.h:186
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodCuts.h:147
void SetTestSignalEfficiency(Double_t effS)
Definition: MethodCuts.h:130
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
Definition: MethodCuts.cxx:982
Double_t * fTmpCutMin
Definition: MethodCuts.h:200
EAnalysisType
Definition: Types.h:129
Double_t * fCutRangeMax
Definition: MethodCuts.h:190
std::vector< TH1 * > * fVarHistS
Definition: MethodCuts.h:219
Basic string class.
Definition: TString.h:137
const Ranking * CreateRanking()
Definition: MethodCuts.h:138
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Definition: MethodCuts.cxx:563
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< PDF * > * fVarPdfS
Definition: MethodCuts.h:223
std::vector< TH1 * > * fVarHistS_smooth
Definition: MethodCuts.h:221
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Definition: MethodCuts.cxx:888
std::vector< Double_t > * fMeanB
Definition: MethodCuts.h:212
#define ClassDef(name, id)
Definition: Rtypes.h:254
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:31
std::vector< EFitParameters > * fFitParams
Definition: MethodCuts.h:185
void Init(void)
default initialisation called by all constructors
Definition: MethodCuts.cxx:234
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
BinarySearchTree * fBinaryTreeS
Definition: MethodCuts.h:194
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< TH1 * > * fVarHistB_smooth
Definition: MethodCuts.h:222
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Definition: PDF.h:71
std::vector< PDF * > * fVarPdfB
Definition: MethodCuts.h:224
void Train(void)
training method: here the cuts are optimised for the training sample
Definition: MethodCuts.cxx:590
std::vector< Int_t > * fRangeSign
Definition: MethodCuts.h:207
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fEffRef
Definition: MethodCuts.h:206
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: Method <strin...
Definition: MethodCuts.cxx:332
Double_t ** fCutMin
Definition: MethodCuts.h:198
Double_t * fCutRangeMin
Definition: MethodCuts.h:189
Double_t fEffSMax
Definition: MethodCuts.h:188
TRandom * fRandom
Definition: MethodCuts.h:208
EFitMethodType fFitMethod
Definition: MethodCuts.h:182
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > * fMeanS
Definition: MethodCuts.h:211
Bool_t fNegEffWarning
Definition: MethodCuts.h:227
int type
Definition: TGX11.cxx:120
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
The TH1 histogram class.
Definition: TH1.h:80
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
TString fFitMethodS
Definition: MethodCuts.h:181
std::vector< Double_t > * fRmsB
Definition: MethodCuts.h:214
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity: R(x) = Integrate_[-oo..x] { PDF(x&#39;) dx&#39; } where PDF(x) is the PDF of the classifier&#39;s...
Definition: MethodCuts.h:122
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodCuts(void)
destructor
Definition: MethodCuts.cxx:284
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: MethodCuts.h:114
static const Double_t fgMaxAbsCutVal
Definition: MethodCuts.h:144
void ProcessOptions()
process user options sanity check, do not allow the input variables to be normalised, because this only creates problems when interpreting the cuts
Definition: MethodCuts.cxx:375
Double_t ** fCutMax
Definition: MethodCuts.h:199
void PrintCuts(Double_t effS) const
print cuts
Definition: MethodCuts.cxx:477
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Double_t GetSeparation(PDF *=0, PDF *=0) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: MethodCuts.h:115
Double_t GetmuTransform(TTree *)
Definition: MethodCuts.h:117
Double_t * fTmpCutMax
Definition: MethodCuts.h:201
TString * fAllVarsI
Definition: MethodCuts.h:202
Double_t GetTrainingEfficiency(const TString &)
virtual void ReadWeightsFromStream(std::istream &)=0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
Definition: MethodCuts.cxx:225
std::vector< Interval * > fCutRange
Definition: MethodCuts.h:191
static MethodCuts * DynamicCast(IMethod *method)
Definition: MethodCuts.h:88
BinarySearchTree * fBinaryTreeB
Definition: MethodCuts.h:195
Double_t fEffSMin
Definition: MethodCuts.h:187
std::vector< TH1 * > * fVarHistB
Definition: MethodCuts.h:220