Logo ROOT  
Reference Guide
MethodCuts.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCuts *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Multivariate optimisation of signal efficiency for given background *
12 * efficiency, using rectangular minimum and maximum requirements on *
13 * input variables *
14 * *
15 * Authors (alphabetical): *
16 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * LAPP, Annecy, France *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 **********************************************************************************/
32
33#ifndef ROOT_TMVA_MethodCuts
34#define ROOT_TMVA_MethodCuts
35
36//////////////////////////////////////////////////////////////////////////
37// //
38// MethodCuts //
39// //
40// Multivariate optimisation of signal efficiency for given background //
41// efficiency, using rectangular minimum and maximum requirements on //
42// input variables //
43// //
44//////////////////////////////////////////////////////////////////////////
45
46#include <vector>
47
48
49#include "TMVA/MethodBase.h"
51#include "TMVA/PDF.h"
52#include "TMatrixDfwd.h"
53#include "IFitterTarget.h"
54
55class TRandom;
56
57namespace TMVA {
58
59 class Interval;
60
61 class MethodCuts : public MethodBase, public IFitterTarget {
62
63 public:
64
65 MethodCuts( const TString& jobName,
66 const TString& methodTitle,
67 DataSetInfo& theData,
68 const TString& theOption = "MC:150:10000:");
69
70 MethodCuts( DataSetInfo& theData,
71 const TString& theWeightFile);
72
73 // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
74 static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
75
76 virtual ~MethodCuts( void );
77
78 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
79
80 // training method
81 void Train( void );
82
84
85 void AddWeightsXMLTo ( void* parent ) const;
86
87 void ReadWeightsFromStream( std::istream & i );
88 void ReadWeightsFromXML ( void* wghtnode );
89
90 // calculate the MVA value (for CUTs this is just a dummy)
91 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
92
93 // write method specific histos to target file
94 void WriteMonitoringHistosToFile( void ) const;
95
96 // test the method
97 void TestClassification();
98
99 // also overwrite --> not computed for cuts
100 Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
101 Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
102 Double_t GetSignificance( void ) const { return -1; }
103 Double_t GetmuTransform ( TTree *) { return -1; }
106
107 // rarity distributions (signal or background (default) is uniform in [0,1])
109
110 // accessors for Minuit
111 Double_t ComputeEstimator( std::vector<Double_t> & );
112
113 Double_t EstimatorFunction( std::vector<Double_t> & );
114 Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
115
117
118 // retrieve cut values for given signal efficiency
119 void PrintCuts( Double_t effS ) const;
120 Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
121 Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
122
123 // ranking of input variables (not available for cuts)
124 const Ranking* CreateRanking() { return 0; }
125
126 void DeclareOptions();
127 void ProcessOptions();
128
129 // maximum |cut| value
131
132 // no check of options at this place
133 void CheckSetup() {}
134
135 protected:
136
137 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
138 void MakeClassSpecific( std::ostream&, const TString& ) const;
139
140 // get help message text
141 void GetHelpMessage() const;
142
143 private:
144
145 // optimisation method
152
153 // efficiency calculation method
154 // - kUseEventSelection: computes efficiencies from given data sample
155 // - kUsePDFs : creates smoothed PDFs from data samples, and
156 // uses this to compute efficiencies
159
160 // improve the Monte Carlo by providing some additional information
165
166 // general
167 TString fFitMethodS; // chosen fit method (string)
168 EFitMethodType fFitMethod; // chosen fit method
169 TString fEffMethodS; // chosen efficiency calculation method (string)
170 EEffMethod fEffMethod; // chosen efficiency calculation method
171 std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
172 Double_t fTestSignalEff; // used to test optimized signal efficiency
173 Double_t fEffSMin; // used to test optimized signal efficiency
174 Double_t fEffSMax; // used to test optimized signal efficiency
175 Double_t* fCutRangeMin; // minimum of allowed cut range
176 Double_t* fCutRangeMax; // maximum of allowed cut range
177 std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
178
179 // for the use of the binary tree method
182
183 // MC method
184 Double_t** fCutMin; // minimum requirement
185 Double_t** fCutMax; // maximum requirement
186 Double_t* fTmpCutMin; // temporary minimum requirement
187 Double_t* fTmpCutMax; // temporary maximum requirement
188 TString* fAllVarsI; // what to do with variables
189
190 // relevant for all methods
191 Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
192 Double_t fEffRef; // reference efficiency
193 std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
194 TRandom* fRandom; // random generator for MC optimisation method
195
196 // basic statistics
197 std::vector<Double_t>* fMeanS; // means of variables (signal)
198 std::vector<Double_t>* fMeanB; // means of variables (background)
199 std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
200 std::vector<Double_t>* fRmsB; // RMSs of variables (background)
201
202 TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
203
204 // PDF section
205 std::vector<TH1*>* fVarHistS; // reference histograms (signal)
206 std::vector<TH1*>* fVarHistB; // reference histograms (background)
207 std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
208 std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
209 std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
210 std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
211
212 // negative efficiencies
213 Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
214
215
216 // the definition of fit parameters can be different from the actual
217 // cut requirements; these functions provide the matching
218 void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
220
221 void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
222 void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
223
224 // creates PDFs in case these are used to compute efficiencies
225 // (corresponds to: EffMethod == kUsePDFs)
226 void CreateVariablePDFs( void );
227
228 // returns signal and background efficiencies for given cuts - using event counting
229 void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
230 Double_t& effS, Double_t& effB );
231 // returns signal and background efficiencies for given cuts - using PDFs
232 void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
233 Double_t& effS, Double_t& effB );
234
235 // default initialisation method called by all constructors
236 void Init( void );
237
238 ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
239 };
240
241} // namespace TMVA
242
243#endif
unsigned int UInt_t
Definition: RtypesCore.h:44
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
#define ClassDef(name, id)
Definition: Rtypes.h:322
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:56
A simple Binary search tree including a volume search method.
Class that contains all the data information.
Definition: DataSetInfo.h:60
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
friend class MethodCuts
Definition: MethodBase.h:601
virtual void ReadWeightsFromStream(std::istream &)=0
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition: MethodCuts.h:61
TRandom * fRandom
Definition: MethodCuts.h:194
Double_t fEffRef
Definition: MethodCuts.h:192
TString fFitMethodS
Definition: MethodCuts.h:167
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
Definition: MethodCuts.cxx:893
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
BinarySearchTree * fBinaryTreeS
Definition: MethodCuts.h:180
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Definition: MethodCuts.cxx:878
void SetTestSignalEfficiency(Double_t effS)
Definition: MethodCuts.h:116
std::vector< Int_t > * fRangeSign
Definition: MethodCuts.h:193
void DeclareOptions()
define the options (their key words) that can be set in the option string.
Definition: MethodCuts.cxx:319
TString fEffMethodS
Definition: MethodCuts.h:169
EFitMethodType fFitMethod
Definition: MethodCuts.h:168
const Ranking * CreateRanking()
Definition: MethodCuts.h:124
Bool_t fNegEffWarning
Definition: MethodCuts.h:213
Double_t fEffSMin
Definition: MethodCuts.h:173
Double_t * fCutRangeMax
Definition: MethodCuts.h:176
Double_t GetSignificance(void) const
compute significance of mean difference
Definition: MethodCuts.h:102
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
static MethodCuts * DynamicCast(IMethod *method)
Definition: MethodCuts.h:74
void GetHelpMessage() const
get help message text
void Train(void)
training method: here the cuts are optimised for the training sample
Definition: MethodCuts.cxx:578
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity:
Definition: MethodCuts.h:108
static const Double_t fgMaxAbsCutVal
Definition: MethodCuts.h:130
std::vector< TH1 * > * fVarHistB
Definition: MethodCuts.h:206
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
std::vector< PDF * > * fVarPdfB
Definition: MethodCuts.h:210
EEffMethod fEffMethod
Definition: MethodCuts.h:170
Double_t * fTmpCutMin
Definition: MethodCuts.h:186
Double_t ** fCutMin
Definition: MethodCuts.h:184
std::vector< TH1 * > * fVarHistS
Definition: MethodCuts.h:205
std::vector< Double_t > * fRmsB
Definition: MethodCuts.h:200
Double_t fEffSMax
Definition: MethodCuts.h:174
std::vector< TH1 * > * fVarHistB_smooth
Definition: MethodCuts.h:208
Double_t GetmuTransform(TTree *)
Definition: MethodCuts.h:103
std::vector< PDF * > * fVarPdfS
Definition: MethodCuts.h:209
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void Init(void)
default initialisation called by all constructors
Definition: MethodCuts.cxx:220
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
Definition: MethodCuts.cxx:211
void ProcessOptions()
process user options.
Definition: MethodCuts.cxx:363
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodCuts.h:133
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
Definition: MethodCuts.cxx:974
virtual ~MethodCuts(void)
destructor
Definition: MethodCuts.cxx:270
Double_t GetSeparation(PDF *=0, PDF *=0) const
compute "separation" defined as
Definition: MethodCuts.h:101
void TestClassification()
nothing to test
Definition: MethodCuts.cxx:827
Double_t * fCutRangeMin
Definition: MethodCuts.h:175
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Definition: MethodCuts.cxx:432
BinarySearchTree * fBinaryTreeB
Definition: MethodCuts.h:181
std::vector< Double_t > * fRmsS
Definition: MethodCuts.h:199
std::vector< Double_t > * fMeanS
Definition: MethodCuts.h:197
std::vector< Double_t > * fMeanB
Definition: MethodCuts.h:198
TString * fAllVarsI
Definition: MethodCuts.h:188
std::vector< EFitParameters > * fFitParams
Definition: MethodCuts.h:171
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Definition: MethodCuts.h:100
Double_t fTestSignalEff
Definition: MethodCuts.h:172
std::vector< Interval * > fCutRange
Definition: MethodCuts.h:177
Double_t * fTmpCutMax
Definition: MethodCuts.h:187
std::vector< TH1 * > * fVarHistS_smooth
Definition: MethodCuts.h:207
void MatchParsToCuts(Double_t *, Double_t *, Double_t *)
Double_t ** fCutMax
Definition: MethodCuts.h:185
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Definition: MethodCuts.cxx:551
void PrintCuts(Double_t effS) const
print cuts
Definition: MethodCuts.cxx:465
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:127
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
Basic string class.
Definition: TString.h:131
A TTree represents a columnar dataset.
Definition: TTree.h:78
create variable transformations