Logo ROOT  
Reference Guide
MethodCuts.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCuts *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Multivariate optimisation of signal efficiency for given background *
12 * efficiency, using rectangular minimum and maximum requirements on *
13 * input variables *
14 * *
15 * Authors (alphabetical): *
16 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * LAPP, Annecy, France *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 **********************************************************************************/
32
33#ifndef ROOT_TMVA_MethodCuts
34#define ROOT_TMVA_MethodCuts
35
36//////////////////////////////////////////////////////////////////////////
37// //
38// MethodCuts //
39// //
40// Multivariate optimisation of signal efficiency for given background //
41// efficiency, using rectangular minimum and maximum requirements on //
42// input variables //
43// //
44//////////////////////////////////////////////////////////////////////////
45
46#include <vector>
47
48
49#include "TMVA/MethodBase.h"
51#include "TMVA/PDF.h"
52#include "TMatrixDfwd.h"
53#include "IFitterTarget.h"
54
55class TRandom;
56
57namespace TMVA {
58
59 class Interval;
60
61 class MethodCuts : public MethodBase, public IFitterTarget {
62
63 public:
64
65 MethodCuts( const TString& jobName,
66 const TString& methodTitle,
67 DataSetInfo& theData,
68 const TString& theOption = "MC:150:10000:");
69
70 MethodCuts( DataSetInfo& theData,
71 const TString& theWeightFile);
72
73 // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
74 static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
75
76 virtual ~MethodCuts( void );
77
78 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
79
80 // training method
81 void Train( void );
82
84
85 void AddWeightsXMLTo ( void* parent ) const;
86
87 void ReadWeightsFromStream( std::istream & i );
88 void ReadWeightsFromXML ( void* wghtnode );
89
90 // calculate the MVA value (for CUTs this is just a dummy)
91 Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr );
92
93 // write method specific histos to target file
94 void WriteMonitoringHistosToFile( void ) const;
95
96 // test the method
97 void TestClassification();
98
99 // also overwrite --> not computed for cuts
100 Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
101 Double_t GetSeparation ( PDF* = nullptr, PDF* = nullptr ) const { return -1; }
102 Double_t GetSignificance( void ) const { return -1; }
103 Double_t GetmuTransform ( TTree *) { return -1; }
106
107 // rarity distributions (signal or background (default) is uniform in [0,1])
109
110 // accessors for Minuit
111 Double_t ComputeEstimator( std::vector<Double_t> & );
112
113 Double_t EstimatorFunction( std::vector<Double_t> & );
114 Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
115
117
118 // retrieve cut values for given signal efficiency
119 void PrintCuts( Double_t effS ) const;
120 Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
121 Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
122
123 // ranking of input variables (not available for cuts)
124 const Ranking* CreateRanking() { return nullptr; }
125
126 void DeclareOptions();
127 void ProcessOptions();
128
129 // maximum |cut| value
131
132 // no check of options at this place
133 void CheckSetup() {}
134
135 protected:
136
137 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
138 void MakeClassSpecific( std::ostream&, const TString& ) const;
139
140 // get help message text
141 void GetHelpMessage() const;
142
143 private:
144
145 // optimisation method
152
153 // efficiency calculation method
154 // - kUseEventSelection: computes efficiencies from given data sample
155 // - kUsePDFs : creates smoothed PDFs from data samples, and
156 // uses this to compute efficiencies
159
160 // improve the Monte Carlo by providing some additional information
165
166 // general
167 TString fFitMethodS; ///< chosen fit method (string)
168 EFitMethodType fFitMethod; ///< chosen fit method
169 TString fEffMethodS; ///< chosen efficiency calculation method (string)
170 EEffMethod fEffMethod; ///< chosen efficiency calculation method
171 std::vector<EFitParameters>* fFitParams; ///< vector for series of fit methods
172 Double_t fTestSignalEff; ///< used to test optimized signal efficiency
173 Double_t fEffSMin; ///< used to test optimized signal efficiency
174 Double_t fEffSMax; ///< used to test optimized signal efficiency
175 Double_t* fCutRangeMin; ///< minimum of allowed cut range
176 Double_t* fCutRangeMax; ///< maximum of allowed cut range
177 std::vector<Interval*> fCutRange; ///< allowed ranges for cut optimisation
178
179 // for the use of the binary tree method
182
183 // MC method
184 Double_t** fCutMin; ///< minimum requirement
185 Double_t** fCutMax; ///< maximum requirement
186 Double_t* fTmpCutMin; ///< temporary minimum requirement
187 Double_t* fTmpCutMax; ///< temporary maximum requirement
188 TString* fAllVarsI; ///< what to do with variables
189
190 // relevant for all methods
191 Int_t fNpar; ///< number of parameters in fit (default: 2*Nvar)
192 Double_t fEffRef; ///< reference efficiency
193 std::vector<Int_t>* fRangeSign; ///< used to match cuts to fit parameters (and vice versa)
194 TRandom* fRandom; ///< random generator for MC optimisation method
195
196 // basic statistics
197 std::vector<Double_t>* fMeanS; ///< means of variables (signal)
198 std::vector<Double_t>* fMeanB; ///< means of variables (background)
199 std::vector<Double_t>* fRmsS; ///< RMSs of variables (signal)
200 std::vector<Double_t>* fRmsB; ///< RMSs of variables (background)
201
202 TH1* fEffBvsSLocal; ///< intermediate eff. background versus eff signal histo
203
204 // PDF section
205 std::vector<TH1*>* fVarHistS; ///< reference histograms (signal)
206 std::vector<TH1*>* fVarHistB; ///< reference histograms (background)
207 std::vector<TH1*>* fVarHistS_smooth; ///< smoothed reference histograms (signal)
208 std::vector<TH1*>* fVarHistB_smooth; ///< smoothed reference histograms (background)
209 std::vector<PDF*>* fVarPdfS; ///< reference PDFs (signal)
210 std::vector<PDF*>* fVarPdfB; ///< reference PDFs (background)
211
212 // negative efficiencies
213 Bool_t fNegEffWarning; ///< flag risen in case of negative efficiency warning
214
215
216 // the definition of fit parameters can be different from the actual
217 // cut requirements; these functions provide the matching
218 void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
220
221 void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
222 void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
223
224 // creates PDFs in case these are used to compute efficiencies
225 // (corresponds to: EffMethod == kUsePDFs)
226 void CreateVariablePDFs( void );
227
228 // returns signal and background efficiencies for given cuts - using event counting
229 void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
230 Double_t& effS, Double_t& effB );
231 // returns signal and background efficiencies for given cuts - using PDFs
232 void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
233 Double_t& effS, Double_t& effB );
234
235 // default initialisation method called by all constructors
236 void Init( void );
237
238 ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
239 };
240
241} // namespace TMVA
242
243#endif
bool Bool_t
Definition: RtypesCore.h:63
unsigned int UInt_t
Definition: RtypesCore.h:46
double Double_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:335
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
A simple Binary search tree including a volume search method.
Class that contains all the data information.
Definition: DataSetInfo.h:62
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
Interface for all concrete MVA method implementations.
Definition: IMethod.h:53
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
friend class MethodCuts
Definition: MethodBase.h:603
virtual void ReadWeightsFromStream(std::istream &)=0
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition: MethodCuts.h:61
TRandom * fRandom
random generator for MC optimisation method
Definition: MethodCuts.h:194
Double_t fEffRef
reference efficiency
Definition: MethodCuts.h:192
TString fFitMethodS
chosen fit method (string)
Definition: MethodCuts.h:167
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
Definition: MethodCuts.cxx:893
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
BinarySearchTree * fBinaryTreeS
Definition: MethodCuts.h:180
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
Definition: MethodCuts.cxx:878
void SetTestSignalEfficiency(Double_t effS)
Definition: MethodCuts.h:116
std::vector< Int_t > * fRangeSign
used to match cuts to fit parameters (and vice versa)
Definition: MethodCuts.h:193
Int_t fNpar
number of parameters in fit (default: 2*Nvar)
Definition: MethodCuts.h:191
void DeclareOptions()
define the options (their key words) that can be set in the option string.
Definition: MethodCuts.cxx:319
TString fEffMethodS
chosen efficiency calculation method (string)
Definition: MethodCuts.h:169
EFitMethodType fFitMethod
chosen fit method
Definition: MethodCuts.h:168
const Ranking * CreateRanking()
Definition: MethodCuts.h:124
Bool_t fNegEffWarning
flag risen in case of negative efficiency warning
Definition: MethodCuts.h:213
Double_t fEffSMin
used to test optimized signal efficiency
Definition: MethodCuts.h:173
Double_t * fCutRangeMax
maximum of allowed cut range
Definition: MethodCuts.h:176
Double_t GetSignificance(void) const
compute significance of mean difference
Definition: MethodCuts.h:102
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
static MethodCuts * DynamicCast(IMethod *method)
Definition: MethodCuts.h:74
void GetHelpMessage() const
get help message text
void Train(void)
training method: here the cuts are optimised for the training sample
Definition: MethodCuts.cxx:578
Double_t GetRarity(Double_t, Types::ESBType) const
compute rarity:
Definition: MethodCuts.h:108
static const Double_t fgMaxAbsCutVal
Definition: MethodCuts.h:130
std::vector< TH1 * > * fVarHistB
reference histograms (background)
Definition: MethodCuts.h:206
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
std::vector< PDF * > * fVarPdfB
reference PDFs (background)
Definition: MethodCuts.h:210
EEffMethod fEffMethod
chosen efficiency calculation method
Definition: MethodCuts.h:170
Double_t * fTmpCutMin
temporary minimum requirement
Definition: MethodCuts.h:186
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
Definition: MethodCuts.cxx:432
Double_t GetSeparation(PDF *=nullptr, PDF *=nullptr) const
compute "separation" defined as
Definition: MethodCuts.h:101
Double_t ** fCutMin
minimum requirement
Definition: MethodCuts.h:184
std::vector< TH1 * > * fVarHistS
reference histograms (signal)
Definition: MethodCuts.h:205
std::vector< Double_t > * fRmsB
RMSs of variables (background)
Definition: MethodCuts.h:200
Double_t fEffSMax
used to test optimized signal efficiency
Definition: MethodCuts.h:174
std::vector< TH1 * > * fVarHistB_smooth
smoothed reference histograms (background)
Definition: MethodCuts.h:208
Double_t GetmuTransform(TTree *)
Definition: MethodCuts.h:103
std::vector< PDF * > * fVarPdfS
reference PDFs (signal)
Definition: MethodCuts.h:209
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void Init(void)
default initialisation called by all constructors
Definition: MethodCuts.cxx:220
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
Definition: MethodCuts.cxx:211
void ProcessOptions()
process user options.
Definition: MethodCuts.cxx:363
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodCuts.h:133
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
Definition: MethodCuts.cxx:974
virtual ~MethodCuts(void)
destructor
Definition: MethodCuts.cxx:270
void TestClassification()
nothing to test
Definition: MethodCuts.cxx:827
Double_t * fCutRangeMin
minimum of allowed cut range
Definition: MethodCuts.h:175
BinarySearchTree * fBinaryTreeB
Definition: MethodCuts.h:181
std::vector< Double_t > * fRmsS
RMSs of variables (signal)
Definition: MethodCuts.h:199
std::vector< Double_t > * fMeanS
means of variables (signal)
Definition: MethodCuts.h:197
std::vector< Double_t > * fMeanB
means of variables (background)
Definition: MethodCuts.h:198
TString * fAllVarsI
what to do with variables
Definition: MethodCuts.h:188
std::vector< EFitParameters > * fFitParams
vector for series of fit methods
Definition: MethodCuts.h:171
Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
Definition: MethodCuts.h:100
Double_t fTestSignalEff
used to test optimized signal efficiency
Definition: MethodCuts.h:172
std::vector< Interval * > fCutRange
allowed ranges for cut optimisation
Definition: MethodCuts.h:177
Double_t * fTmpCutMax
temporary maximum requirement
Definition: MethodCuts.h:187
std::vector< TH1 * > * fVarHistS_smooth
smoothed reference histograms (signal)
Definition: MethodCuts.h:207
void MatchParsToCuts(Double_t *, Double_t *, Double_t *)
Double_t ** fCutMax
maximum requirement
Definition: MethodCuts.h:185
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
TH1 * fEffBvsSLocal
intermediate eff. background versus eff signal histo
Definition: MethodCuts.h:202
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Definition: MethodCuts.cxx:551
void PrintCuts(Double_t effS) const
print cuts
Definition: MethodCuts.cxx:465
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:126
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
Basic string class.
Definition: TString.h:136
A TTree represents a columnar dataset.
Definition: TTree.h:79
create variable transformations