Logo ROOT  
Reference Guide
TBinomialEfficiencyFitter.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Frank Filthaut, Rene Brun 30/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TBinomialEfficiencyFitter
13 \ingroup Hist
14 \brief Binomial fitter for the division of two histograms.
15
16Use when you need to calculate a selection's efficiency from two histograms,
17one containing all entries, and one containing the subset of these entries
18that pass the selection, and when you have a parametrization available for
19the efficiency as a function of the variable(s) under consideration.
20
21A very common problem when estimating efficiencies is that of error estimation:
22when no other information is available than the total number of events N and
23the selected number n, the best estimate for the selection efficiency p is n/N.
24Standard binomial statistics dictates that the uncertainty (this presupposes
25sufficiently high statistics that an approximation by a normal distribution is
26reasonable) on p, given N, is
27\f[
28 \sqrt{\frac{p(1-p)}{N}}
29\f]
30However, when p is estimated as n/N, fluctuations from the true p to its
31estimate become important, especially for low numbers of events, and giving
32rise to biased results.
33
34When fitting a parametrized efficiency, these problems can largely be overcome,
35as a hypothesized true efficiency is available by construction. Even so, simply
36using the corresponding uncertainty still presupposes that Gaussian errors
37yields a reasonable approximation. When using, instead of binned efficiency
38histograms, the original numerator and denominator histograms, a binned maximum
39likelihood can be constructed as the product of bin-by-bin binomial probabilities
40to select n out of N events. Assuming that a correct parametrization of the
41efficiency is provided, this construction in general yields less biased results
42(and is much less sensitive to binning details).
43
44A generic use of this method is given below (note that the method works for 2D
45and 3D histograms as well):
46
47~~~ {.cpp}
48 {
49 TH1* denominator; // denominator histogram
50 TH1* numerator; // corresponding numerator histogram
51 TF1* eff; // efficiency parametrization
52 .... // set step sizes and initial parameter
53 .... // values for the fit function
54 .... // possibly also set ranges, see TF1::SetRange()
55 TBinomialEfficiencyFitter* f = new TBinomialEfficiencyFitter(
56 numerator, denominator);
57 Int_t status = f->Fit(eff, "I");
58 if (status == 0) {
59 // if the fit was successful, display bin-by-bin efficiencies
60 // as well as the result of the fit
61 numerator->Sumw2();
62 TH1* hEff = dynamic_cast<TH1*>(numerator->Clone("heff"));
63 hEff->Divide(hEff, denominator, 1.0, 1.0, "B");
64 hEff->Draw("E");
65 eff->Draw("same");
66 }
67 }
68~~~
69
70Note that this method cannot be expected to yield reliable results when using
71weighted histograms (because the likelihood computation will be incorrect).
72
73*/
74
76
77#include "TMath.h"
78#include "TPluginManager.h"
79#include "TROOT.h"
80#include "TH1.h"
81#include "TF1.h"
82#include "TF2.h"
83#include "TF3.h"
84#include "Fit/FitConfig.h"
85#include "Fit/Fitter.h"
86#include "TFitResult.h"
87#include "Math/Functor.h"
89
90#include <limits>
91
92
94
96
97
98////////////////////////////////////////////////////////////////////////////////
99/// default constructor
100
102 fNumerator = 0;
103 fDenominator = 0;
104 fFunction = 0;
107 fRange = kFALSE;
109 fFitter = 0;
110}
111
112////////////////////////////////////////////////////////////////////////////////
113/// Constructor.
114///
115/// Note that no objects are copied, so it is up to the user to ensure that the
116/// histogram pointers remain valid.
117///
118/// Both histograms need to be "consistent". This is not checked here, but in
119/// TBinomialEfficiencyFitter::Fit().
120
121TBinomialEfficiencyFitter::TBinomialEfficiencyFitter(const TH1 *numerator, const TH1 *denominator) {
123 fFunction = 0;
124 fFitter = 0;
125 Set(numerator,denominator);
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// destructor
130
132 if (fFitter) delete fFitter;
133 fFitter = 0;
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// Initialize with a new set of inputs.
138
139void TBinomialEfficiencyFitter::Set(const TH1 *numerator, const TH1 *denominator)
140{
141 fNumerator = (TH1*)numerator;
142 fDenominator = (TH1*)denominator;
143
146 fRange = kFALSE;
147}
148
149////////////////////////////////////////////////////////////////////////////////
150/// Set the required integration precision, see TF1::Integral()
151
153{
155}
156
157////////////////////////////////////////////////////////////////////////////////
158/// Provide access to the underlying fitter object.
159/// This may be useful e.g. for the retrieval of additional information (such
160/// as the output covariance matrix of the fit).
161
163{
164 if (!fFitter) fFitter = new ROOT::Fit::Fitter();
165 return fFitter;
166
167}
168
169////////////////////////////////////////////////////////////////////////////////
170/// Carry out the fit of the given function to the given histograms.
171///
172/// If option "I" is used, the fit function will be averaged over the
173/// bin (the default is to evaluate it simply at the bin center).
174///
175/// If option "R" is used, the fit range will be taken from the fit
176/// function (the default is to use the entire histogram).
177///
178/// If option "S" a TFitResult object is returned and it can be used to obtain
179/// additional fit information, like covariance or correlation matrix.
180///
181/// Note that all parameter values, limits, and step sizes are copied
182/// from the input fit function f1 (so they should be set before calling
183/// this method. This is particularly relevant for the step sizes, taken
184/// to be the "error" set on input, as a null step size usually fixes the
185/// corresponding parameter. That is protected against, but in such cases
186/// an arbitrary starting step size will be used, and the reliability of
187/// the fit should be questioned). If parameters are to be fixed, this
188/// should be done by specifying non-null parameter limits, with lower
189/// limits larger than upper limits.
190///
191/// On output, f1 contains the fitted parameters and errors, as well as
192/// the number of degrees of freedom, and the goodness-of-fit estimator
193/// as given by S. Baker and R. Cousins, Nucl. Instr. Meth. A221 (1984) 437.
194
196{
197 TString opt = option;
198 opt.ToUpper();
199 fAverage = opt.Contains("I");
200 fRange = opt.Contains("R");
201 Bool_t verbose = opt.Contains("V");
202 Bool_t quiet = opt.Contains("Q");
203 Bool_t saveResult = opt.Contains("S");
204 if (!f1) return -1;
205 fFunction = (TF1*)f1;
206 Int_t i, npar;
207 npar = f1->GetNpar();
208 if (npar <= 0) {
209 Error("Fit", "function %s has illegal number of parameters = %d",
210 f1->GetName(), npar);
211 return -3;
212 }
213
214 // Check that function has same dimension as histogram
215 if (!fNumerator || !fDenominator) {
216 Error("Fit","No numerator or denominator histograms set");
217 return -5;
218 }
219 if (f1->GetNdim() != fNumerator->GetDimension()) {
220 Error("Fit","function %s dimension, %d, does not match histogram dimension, %d",
222 return -4;
223 }
224 // Check that the numbers of bins for the histograms match
226 (f1->GetNdim() > 1 && fNumerator->GetNbinsY() != fDenominator->GetNbinsY()) ||
227 (f1->GetNdim() > 2 && fNumerator->GetNbinsZ() != fDenominator->GetNbinsZ())) {
228 Error("Fit", "numerator and denominator histograms do not have identical numbers of bins");
229 return -6;
230 }
231
232 // initialize the fitter
233
234 if (!fFitter) {
236 }
237
238
239 std::vector<ROOT::Fit::ParameterSettings> & parameters = fFitter->Config().ParamsSettings();
240 parameters.reserve(npar);
241 for (i = 0; i < npar; i++) {
242
243 // assign an ARBITRARY starting error to ensure the parameter won't be fixed!
244 Double_t we = f1->GetParError(i);
245 if (we <= 0) we = 0.3*TMath::Abs(f1->GetParameter(i));
246 if (we == 0) we = 0.01;
247
248 parameters.push_back(ROOT::Fit::ParameterSettings(f1->GetParName(i), f1->GetParameter(i), we) );
249
250 Double_t plow, pup;
251 f1->GetParLimits(i,plow,pup);
252 // when a parameter is fixed is having plow and pup equal to the value (if this is not zero)
253 // we handle special case when fixed parameter has zero value (in that case plow=1 and pup =1 )
254 if (plow >= pup && (plow==f1->GetParameter(i) || pup==f1->GetParameter(i) ||
255 ( f1->GetParameter(i) == 0 && plow==1. && pup == 1.) ) ) {
256 parameters.back().Fix();
257 Info("Fit", "Fixing parameter %s to value %f", f1->GetParName(i), f1->GetParameter(i));
258 } else if (plow < pup) {
259 parameters.back().SetLimits(plow,pup);
260 Info("Fit", "Setting limits for parameter %s to [%f,%f]", f1->GetParName(i), plow,pup);
261 }
262 }
263
264 // fcn must be set after setting the parameters
266
267 // set also model function in fitter to have it in FitResult
268 // in this way one can compute for example the confidence intervals
270
271 // in case default value of 1.0 is used
272 if (fFitter->Config().MinimizerOptions().ErrorDef() == 1.0 ) {
274 }
275
276 if (verbose) {
278 }
279 else if (quiet) {
281 }
282
283 // perform the actual fit
284
285 fFitDone = kTRUE;
286 Bool_t status = fFitter->FitFCN();
287 if ( !status && !quiet)
288 Warning("Fit","Abnormal termination of minimization.");
289
290
291 //Store fit results in fitFunction
292 const ROOT::Fit::FitResult & fitResult = fFitter->Result();
293 if (!fitResult.IsEmpty() ) {
294 // set in f1 the result of the fit
295 f1->SetNDF(fitResult.Ndf() );
296
297 //f1->SetNumberFitPoints(...); // this is set in ComputeFCN
298
299 f1->SetParameters( &(fitResult.Parameters().front()) );
300 if ( int( fitResult.Errors().size()) >= f1->GetNpar() )
301 f1->SetParErrors( &(fitResult.Errors().front()) );
302
303 f1->SetChisquare(2.*fitResult.MinFcnValue()); // store goodness of fit (Baker&Cousins)
305 Info("result"," chi2 %f ndf %d ",2.*fitResult.MinFcnValue(), fitResult.Ndf() );
306
307 }
308 // create a new result class if needed
309 if (saveResult) {
310 TFitResult* fr = new TFitResult(fitResult);
311 TString name = TString::Format("TBinomialEfficiencyFitter_result_of_%s",f1->GetName() );
312 fr->SetName(name); fr->SetTitle(name);
313 return TFitResultPtr(fr);
314 }
315 else {
316 return TFitResultPtr(fitResult.Status() );
317 }
318
319}
320
321////////////////////////////////////////////////////////////////////////////////
322/// Compute the likelihood.
323
325{
326 int nDim = fDenominator->GetDimension();
327
328 int xlowbin = fDenominator->GetXaxis()->GetFirst();
329 int xhighbin = fDenominator->GetXaxis()->GetLast();
330 int ylowbin = 0, yhighbin = 0, zlowbin = 0, zhighbin = 0;
331 if (nDim > 1) {
332 ylowbin = fDenominator->GetYaxis()->GetFirst();
333 yhighbin = fDenominator->GetYaxis()->GetLast();
334 if (nDim > 2) {
335 zlowbin = fDenominator->GetZaxis()->GetFirst();
336 zhighbin = fDenominator->GetZaxis()->GetLast();
337 }
338 }
339
341
342 if (fRange) {
343 double xmin, xmax, ymin, ymax, zmin, zmax;
344
345 // This way to ensure that a minimum range chosen exactly at a
346 // bin boundary is far from elegant, but is hopefully adequate.
347
348 if (nDim == 1) {
350 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
351 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
352 } else if (nDim == 2) {
354 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
355 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
356 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
357 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
358 } else if (nDim == 3) {
359 fFunction->GetRange(xmin, ymin, zmin, xmax, ymax, zmax);
360 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
361 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
362 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
363 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
364 zlowbin = fDenominator->GetZaxis()->FindBin(zmin);
365 zhighbin = fDenominator->GetZaxis()->FindBin(zmax);
366 }
367 }
368
369 // The coding below is perhaps somewhat awkward -- but it is done
370 // so that 1D, 2D, and 3D cases can be covered in the same loops.
371
372 f = 0.;
373
374 Int_t npoints = 0;
375 Double_t nmax = 0;
376 for (int xbin = xlowbin; xbin <= xhighbin; ++xbin) {
377
378 // compute the bin edges
381
382 for (int ybin = ylowbin; ybin <= yhighbin; ++ybin) {
383
384 // compute the bin edges (if applicable)
385 Double_t ylow = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin) : 0;
386 Double_t yup = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin+1) : 0;
387
388 for (int zbin = zlowbin; zbin <= zhighbin; ++zbin) {
389
390 // compute the bin edges (if applicable)
391 Double_t zlow = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin) : 0;
392 Double_t zup = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin+1) : 0;
393
394 int bin = fDenominator->GetBin(xbin, ybin, zbin);
396 Double_t nNum = fNumerator->GetBinContent(bin);
397
398 // count maximum value to use in the likelihood for inf
399 // i.e. a number much larger than the other terms
400 if (nDen> nmax) nmax = nDen;
401 if (nDen <= 0.) continue;
402 npoints++;
403
404 // mu is the average of the function over the bin OR
405 // the function evaluated at the bin centre
406 // As yet, there is nothing to prevent mu from being
407 // outside the range <0,1> !!
408
409 Double_t mu = 0;
410 switch (nDim) {
411 case 1:
412 mu = (fAverage) ?
413 fFunction->Integral(xlow, xup, fEpsilon)
414 / (xup-xlow) :
416 break;
417 case 2:
418 {
419 mu = (fAverage) ?
420 ((TF2*)fFunction)->Integral(xlow, xup, ylow, yup, fEpsilon)
421 / ((xup-xlow)*(yup-ylow)) :
424 }
425 break;
426 case 3:
427 {
428 mu = (fAverage) ?
429 ((TF3*)fFunction)->Integral(xlow, xup, ylow, yup, zlow, zup, fEpsilon)
430 / ((xup-xlow)*(yup-ylow)*(zup-zlow)) :
434 }
435 }
436
437 // binomial formula (forgetting about the factorials)
438 if (nNum != 0.) {
439 if (mu > 0.)
440 f -= nNum * TMath::Log(mu*nDen/nNum);
441 else
442 f -= nmax * -1E30; // crossing our fingers
443 }
444 if (nDen - nNum != 0.) {
445 if (1. - mu > 0.)
446 f -= (nDen - nNum) * TMath::Log((1. - mu)*nDen/(nDen-nNum));
447 else
448 f -= nmax * -1E30; // crossing our fingers
449 }
450 }
451 }
452 }
453
455}
#define f(i)
Definition: RSha256.hxx:104
const Bool_t kFALSE
Definition: RtypesCore.h:90
double Double_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:89
const char Option_t
Definition: RtypesCore.h:64
#define ClassImp(name)
Definition: Rtypes.h:361
const Double_t kDefaultEpsilon
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float ymin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
float ymax
Definition: THbookFile.cxx:93
const std::vector< ROOT::Fit::ParameterSettings > & ParamsSettings() const
get the vector of parameter settings (const method)
Definition: FitConfig.h:85
ROOT::Math::MinimizerOptions & MinimizerOptions()
access to the minimizer control parameter (non const method)
Definition: FitConfig.h:166
class containg the result of the fit and all the related information (fitted parameter values,...
Definition: FitResult.h:47
bool IsEmpty() const
True if a fit result does not exist (even invalid) with parameter values.
Definition: FitResult.h:117
const std::vector< double > & Errors() const
parameter errors (return st::vector)
Definition: FitResult.h:169
const std::vector< double > & Parameters() const
parameter values (return std::vector)
Definition: FitResult.h:174
unsigned int Ndf() const
Number of degree of freedom.
Definition: FitResult.h:163
double MinFcnValue() const
Return value of the objective function (chi2 or likelihood) used in the fit.
Definition: FitResult.h:120
unsigned int NFreeParameters() const
get total number of free parameters
Definition: FitResult.h:134
int Status() const
minimizer status code
Definition: FitResult.h:137
Fitter class, entry point for performing all type of fits.
Definition: Fitter.h:77
bool FitFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Fit using the a generic FCN function as a C++ callable object implementing double () (const double *)...
Definition: Fitter.h:610
bool SetFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Set a generic FCN function as a C++ callable object implementing double () (const double *) Note that...
Definition: Fitter.h:615
const FitResult & Result() const
get fit result
Definition: Fitter.h:384
const FitConfig & Config() const
access to the fit configuration (const method)
Definition: Fitter.h:412
Class, describing value, limits and step size of the parameters Provides functionality also to set/re...
Documentation for class Functor class.
Definition: Functor.h:398
Documentation for the abstract class IBaseFunctionMultiDim.
Definition: IFunction.h:62
double ErrorDef() const
error definition
void SetErrorDef(double err)
set error def
void SetPrintLevel(int level)
set print level
Class to Wrap a ROOT Function class (like TF1) in a IParamMultiFunction interface of multi-dimensions...
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition: TAxis.cxx:475
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition: TAxis.cxx:290
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:515
Int_t GetLast() const
Return last bin on the axis i.e.
Definition: TAxis.cxx:466
Int_t GetFirst() const
Return first bin on the axis i.e.
Definition: TAxis.cxx:455
Binomial fitter for the division of two histograms.
void Set(const TH1 *numerator, const TH1 *denominator)
Initialize with a new set of inputs.
virtual ~TBinomialEfficiencyFitter()
destructor
TBinomialEfficiencyFitter()
default constructor
ROOT::Fit::Fitter * GetFitter()
Provide access to the underlying fitter object.
Double_t EvaluateFCN(const Double_t *par)
void SetPrecision(Double_t epsilon)
Set the required integration precision, see TF1::Integral()
void ComputeFCN(Double_t &f, const Double_t *par)
Compute the likelihood.
TFitResultPtr Fit(TF1 *f1, Option_t *option="")
Carry out the fit of the given function to the given histograms.
1-Dim function class
Definition: TF1.h:210
virtual void GetParLimits(Int_t ipar, Double_t &parmin, Double_t &parmax) const
Return limits for parameter ipar.
Definition: TF1.cxx:1923
virtual void SetNDF(Int_t ndf)
Set the number of degrees of freedom ndf should be the number of points used in a fit - the number of...
Definition: TF1.cxx:3421
virtual Double_t GetParError(Int_t ipar) const
Return value of parameter number ipar.
Definition: TF1.cxx:1913
virtual void SetChisquare(Double_t chi2)
Definition: TF1.h:606
virtual Int_t GetNpar() const
Definition: TF1.h:475
virtual void SetParErrors(const Double_t *errors)
Set errors for all active parameters when calling this function, the array errors must have at least ...
Definition: TF1.cxx:3486
virtual Double_t Integral(Double_t a, Double_t b, Double_t epsrel=1.e-12)
IntegralOneDim or analytical integral.
Definition: TF1.cxx:2505
virtual Int_t GetNumberFitPoints() const
Definition: TF1.h:497
virtual void SetNumberFitPoints(Int_t npfits)
Definition: TF1.h:618
virtual void GetRange(Double_t *xmin, Double_t *xmax) const
Return range of a generic N-D function.
Definition: TF1.cxx:2266
virtual const char * GetParName(Int_t ipar) const
Definition: TF1.h:523
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:638
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1432
virtual Int_t GetNdim() const
Definition: TF1.h:479
virtual Double_t GetParameter(Int_t ipar) const
Definition: TF1.h:506
A 2-Dim function with parameters.
Definition: TF2.h:29
A 3-Dim function with parameters.
Definition: TF3.h:28
Provides an indirection to the TFitResult class and with a semantics identical to a TFitResult pointe...
Definition: TFitResultPtr.h:31
Extends the ROOT::Fit::Result class with a TNamed inheritance providing easy possibility for I/O.
Definition: TFitResult.h:32
The TH1 histogram class.
Definition: TH1.h:56
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8597
TAxis * GetZaxis()
Definition: TH1.h:318
virtual Int_t GetNbinsY() const
Definition: TH1.h:293
virtual Int_t GetNbinsZ() const
Definition: TH1.h:294
virtual Int_t GetDimension() const
Definition: TH1.h:278
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
virtual Int_t GetBin(Int_t binx, Int_t biny=0, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH1.cxx:4809
virtual Int_t GetNbinsX() const
Definition: TH1.h:292
TAxis * GetYaxis()
Definition: TH1.h:317
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4907
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:877
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:891
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:865
Basic string class.
Definition: TString.h:131
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
TF1 * f1
Definition: legend1.C:11
constexpr Double_t E()
Base of natural log:
Definition: TMath.h:97
Double_t Log(Double_t x)
Definition: TMath.h:750
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
REAL epsilon
Definition: triangle.c:617