Logo ROOT   6.18/05
Reference Guide
TBinomialEfficiencyFitter.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Frank Filthaut, Rene Brun 30/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TBinomialEfficiencyFitter
13 \ingroup Hist
14 \brief Binomial fitter for the division of two histograms.
15
16Use when you need to calculate a selection's efficiency from two histograms,
17one containing all entries, and one containing the subset of these entries
18that pass the selection, and when you have a parametrization available for
19the efficiency as a function of the variable(s) under consideration.
20
21A very common problem when estimating efficiencies is that of error estimation:
22when no other information is available than the total number of events N and
23the selected number n, the best estimate for the selection efficiency p is n/N.
24Standard binomial statistics dictates that the uncertainty (this presupposes
25sufficiently high statistics that an approximation by a normal distribution is
26reasonable) on p, given N, is
27\f[
28 \sqrt{\frac{p(1-p)}{N}}
29\f]
30However, when p is estimated as n/N, fluctuations from the true p to its
31estimate become important, especially for low numbers of events, and giving
32rise to biased results.
33
34When fitting a parametrized efficiency, these problems can largely be overcome,
35as a hypothesized true efficiency is available by construction. Even so, simply
36using the corresponding uncertainty still presupposes that Gaussian errors
37yields a reasonable approximation. When using, instead of binned efficiency
38histograms, the original numerator and denominator histograms, a binned maximum
39likelihood can be constructed as the product of bin-by-bin binomial probabilities
40to select n out of N events. Assuming that a correct parametrization of the
41efficiency is provided, this construction in general yields less biased results
42(and is much less sensitive to binning details).
43
44A generic use of this method is given below (note that the method works for 2D
45and 3D histograms as well):
46
47~~~ {.cpp}
48 {
49 TH1* denominator; // denominator histogram
50 TH1* numerator; // corresponding numerator histogram
51 TF1* eff; // efficiency parametrization
52 .... // set step sizes and initial parameter
53 .... // values for the fit function
54 .... // possibly also set ranges, see TF1::SetRange()
55 TBinomialEfficiencyFitter* f = new TBinomialEfficiencyFitter(
56 numerator, denominator);
57 Int_t status = f->Fit(eff, "I");
58 if (status == 0) {
59 // if the fit was successful, display bin-by-bin efficiencies
60 // as well as the result of the fit
61 numerator->Sumw2();
62 TH1* hEff = dynamic_cast<TH1*>(numerator->Clone("heff"));
63 hEff->Divide(hEff, denominator, 1.0, 1.0, "B");
64 hEff->Draw("E");
65 eff->Draw("same");
66 }
67 }
68~~~
69
70Note that this method cannot be expected to yield reliable results when using
71weighted histograms (because the likelihood computation will be incorrect).
72
73*/
74
76
77#include "TMath.h"
78#include "TPluginManager.h"
79#include "TROOT.h"
80#include "TH1.h"
81#include "TF1.h"
82#include "TF2.h"
83#include "TF3.h"
84#include "Fit/FitConfig.h"
85#include "Fit/Fitter.h"
86#include "TFitResult.h"
87#include "Math/Functor.h"
88#include "Math/Functor.h"
89#include "Math/WrappedTF1.h"
90
91#include <limits>
92
93
95
97
98
99////////////////////////////////////////////////////////////////////////////////
100/// default constructor
101
103 fNumerator = 0;
104 fDenominator = 0;
105 fFunction = 0;
108 fRange = kFALSE;
110 fFitter = 0;
111}
112
113////////////////////////////////////////////////////////////////////////////////
114/// Constructor.
115///
116/// Note that no objects are copied, so it is up to the user to ensure that the
117/// histogram pointers remain valid.
118///
119/// Both histograms need to be "consistent". This is not checked here, but in
120/// TBinomialEfficiencyFitter::Fit().
121
122TBinomialEfficiencyFitter::TBinomialEfficiencyFitter(const TH1 *numerator, const TH1 *denominator) {
124 fFunction = 0;
125 fFitter = 0;
126 Set(numerator,denominator);
127}
128
129////////////////////////////////////////////////////////////////////////////////
130/// destructor
131
133 if (fFitter) delete fFitter;
134 fFitter = 0;
135}
136
137////////////////////////////////////////////////////////////////////////////////
138/// Initialize with a new set of inputs.
139
140void TBinomialEfficiencyFitter::Set(const TH1 *numerator, const TH1 *denominator)
141{
142 fNumerator = (TH1*)numerator;
143 fDenominator = (TH1*)denominator;
144
147 fRange = kFALSE;
148}
149
150////////////////////////////////////////////////////////////////////////////////
151/// Set the required integration precision, see TF1::Integral()
152
154{
156}
157
158////////////////////////////////////////////////////////////////////////////////
159/// Provide access to the underlying fitter object.
160/// This may be useful e.g. for the retrieval of additional information (such
161/// as the output covariance matrix of the fit).
162
164{
165 if (!fFitter) fFitter = new ROOT::Fit::Fitter();
166 return fFitter;
167
168}
169
170////////////////////////////////////////////////////////////////////////////////
171/// Carry out the fit of the given function to the given histograms.
172///
173/// If option "I" is used, the fit function will be averaged over the
174/// bin (the default is to evaluate it simply at the bin center).
175///
176/// If option "R" is used, the fit range will be taken from the fit
177/// function (the default is to use the entire histogram).
178///
179/// If option "S" a TFitResult object is returned and it can be used to obtain
180/// additional fit information, like covariance or correlation matrix.
181///
182/// Note that all parameter values, limits, and step sizes are copied
183/// from the input fit function f1 (so they should be set before calling
184/// this method. This is particularly relevant for the step sizes, taken
185/// to be the "error" set on input, as a null step size usually fixes the
186/// corresponding parameter. That is protected against, but in such cases
187/// an arbitrary starting step size will be used, and the reliability of
188/// the fit should be questioned). If parameters are to be fixed, this
189/// should be done by specifying non-null parameter limits, with lower
190/// limits larger than upper limits.
191///
192/// On output, f1 contains the fitted parameters and errors, as well as
193/// the number of degrees of freedom, and the goodness-of-fit estimator
194/// as given by S. Baker and R. Cousins, Nucl. Instr. Meth. A221 (1984) 437.
195
197{
198 TString opt = option;
199 opt.ToUpper();
200 fAverage = opt.Contains("I");
201 fRange = opt.Contains("R");
202 Bool_t verbose = opt.Contains("V");
203 Bool_t quiet = opt.Contains("Q");
204 Bool_t saveResult = opt.Contains("S");
205 if (!f1) return -1;
206 fFunction = (TF1*)f1;
207 Int_t i, npar;
208 npar = f1->GetNpar();
209 if (npar <= 0) {
210 Error("Fit", "function %s has illegal number of parameters = %d",
211 f1->GetName(), npar);
212 return -3;
213 }
214
215 // Check that function has same dimension as histogram
216 if (!fNumerator || !fDenominator) {
217 Error("Fit","No numerator or denominator histograms set");
218 return -5;
219 }
220 if (f1->GetNdim() != fNumerator->GetDimension()) {
221 Error("Fit","function %s dimension, %d, does not match histogram dimension, %d",
223 return -4;
224 }
225 // Check that the numbers of bins for the histograms match
227 (f1->GetNdim() > 1 && fNumerator->GetNbinsY() != fDenominator->GetNbinsY()) ||
228 (f1->GetNdim() > 2 && fNumerator->GetNbinsZ() != fDenominator->GetNbinsZ())) {
229 Error("Fit", "numerator and denominator histograms do not have identical numbers of bins");
230 return -6;
231 }
232
233 // initialize the fitter
234
235 if (!fFitter) {
237 }
238
239
240 std::vector<ROOT::Fit::ParameterSettings> & parameters = fFitter->Config().ParamsSettings();
241 parameters.reserve(npar);
242 for (i = 0; i < npar; i++) {
243
244 // assign an ARBITRARY starting error to ensure the parameter won't be fixed!
245 Double_t we = f1->GetParError(i);
246 if (we <= 0) we = 0.3*TMath::Abs(f1->GetParameter(i));
247 if (we == 0) we = 0.01;
248
249 parameters.push_back(ROOT::Fit::ParameterSettings(f1->GetParName(i), f1->GetParameter(i), we) );
250
251 Double_t plow, pup;
252 f1->GetParLimits(i,plow,pup);
253 if (plow*pup != 0 && plow >= pup) { // this is a limitation - cannot fix a parameter to zero value
254 parameters.back().Fix();
255 }
256 else if (plow < pup ) {
257 parameters.back().SetLimits(plow,pup);
258 }
259 }
260
261 // fcn must be set after setting the parameters
263 fFitter->SetFCN(static_cast<ROOT::Math::IMultiGenFunction&>(fcnFunction));
264
265
266 // in case default value of 1.0 is used
267 if (fFitter->Config().MinimizerOptions().ErrorDef() == 1.0 ) {
269 }
270
271 if (verbose) {
273 }
274 else if (quiet) {
276 }
277
278 // set model function in fitter to have it in FitResult
279 // in this way one can compute for example the confidence intervals
281
282 // perform the actual fit
283
284 fFitDone = kTRUE;
285 Bool_t status = fFitter->FitFCN();
286 if ( !status && !quiet)
287 Warning("Fit","Abnormal termination of minimization.");
288
289
290 //Store fit results in fitFunction
291 const ROOT::Fit::FitResult & fitResult = fFitter->Result();
292 if (!fitResult.IsEmpty() ) {
293 // set in f1 the result of the fit
294 f1->SetNDF(fitResult.Ndf() );
295
296 //f1->SetNumberFitPoints(...); // this is set in ComputeFCN
297
298 f1->SetParameters( &(fitResult.Parameters().front()) );
299 if ( int( fitResult.Errors().size()) >= f1->GetNpar() )
300 f1->SetParErrors( &(fitResult.Errors().front()) );
301
302 f1->SetChisquare(2.*fitResult.MinFcnValue()); // store goodness of fit (Baker&Cousins)
304 Info("result"," chi2 %f ndf %d ",2.*fitResult.MinFcnValue(), fitResult.Ndf() );
305
306 }
307 // create a new result class if needed
308 if (saveResult) {
309 TFitResult* fr = new TFitResult(fitResult);
310 TString name = TString::Format("TBinomialEfficiencyFitter_result_of_%s",f1->GetName() );
311 fr->SetName(name); fr->SetTitle(name);
312 return TFitResultPtr(fr);
313 }
314 else {
315 return TFitResultPtr(fitResult.Status() );
316 }
317
318}
319
320////////////////////////////////////////////////////////////////////////////////
321/// Compute the likelihood.
322
324{
325 int nDim = fDenominator->GetDimension();
326
327 int xlowbin = fDenominator->GetXaxis()->GetFirst();
328 int xhighbin = fDenominator->GetXaxis()->GetLast();
329 int ylowbin = 0, yhighbin = 0, zlowbin = 0, zhighbin = 0;
330 if (nDim > 1) {
331 ylowbin = fDenominator->GetYaxis()->GetFirst();
332 yhighbin = fDenominator->GetYaxis()->GetLast();
333 if (nDim > 2) {
334 zlowbin = fDenominator->GetZaxis()->GetFirst();
335 zhighbin = fDenominator->GetZaxis()->GetLast();
336 }
337 }
338
340
341 if (fRange) {
342 double xmin, xmax, ymin, ymax, zmin, zmax;
343
344 // This way to ensure that a minimum range chosen exactly at a
345 // bin boundary is far from elegant, but is hopefully adequate.
346
347 if (nDim == 1) {
349 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
350 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
351 } else if (nDim == 2) {
353 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
354 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
355 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
356 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
357 } else if (nDim == 3) {
358 fFunction->GetRange(xmin, ymin, zmin, xmax, ymax, zmax);
359 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
360 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
361 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
362 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
363 zlowbin = fDenominator->GetZaxis()->FindBin(zmin);
364 zhighbin = fDenominator->GetZaxis()->FindBin(zmax);
365 }
366 }
367
368 // The coding below is perhaps somewhat awkward -- but it is done
369 // so that 1D, 2D, and 3D cases can be covered in the same loops.
370
371 f = 0.;
372
373 Int_t npoints = 0;
374 Double_t nmax = 0;
375 for (int xbin = xlowbin; xbin <= xhighbin; ++xbin) {
376
377 // compute the bin edges
380
381 for (int ybin = ylowbin; ybin <= yhighbin; ++ybin) {
382
383 // compute the bin edges (if applicable)
384 Double_t ylow = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin) : 0;
385 Double_t yup = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin+1) : 0;
386
387 for (int zbin = zlowbin; zbin <= zhighbin; ++zbin) {
388
389 // compute the bin edges (if applicable)
390 Double_t zlow = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin) : 0;
391 Double_t zup = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin+1) : 0;
392
393 int bin = fDenominator->GetBin(xbin, ybin, zbin);
395 Double_t nNum = fNumerator->GetBinContent(bin);
396
397 // count maximum value to use in the likelihood for inf
398 // i.e. a number much larger than the other terms
399 if (nDen> nmax) nmax = nDen;
400 if (nDen <= 0.) continue;
401 npoints++;
402
403 // mu is the average of the function over the bin OR
404 // the function evaluated at the bin centre
405 // As yet, there is nothing to prevent mu from being
406 // outside the range <0,1> !!
407
408 Double_t mu = 0;
409 switch (nDim) {
410 case 1:
411 mu = (fAverage) ?
412 fFunction->Integral(xlow, xup, fEpsilon)
413 / (xup-xlow) :
415 break;
416 case 2:
417 {
418 mu = (fAverage) ?
419 ((TF2*)fFunction)->Integral(xlow, xup, ylow, yup, fEpsilon)
420 / ((xup-xlow)*(yup-ylow)) :
423 }
424 break;
425 case 3:
426 {
427 mu = (fAverage) ?
428 ((TF3*)fFunction)->Integral(xlow, xup, ylow, yup, zlow, zup, fEpsilon)
429 / ((xup-xlow)*(yup-ylow)*(zup-zlow)) :
433 }
434 }
435
436 // binomial formula (forgetting about the factorials)
437 if (nNum != 0.) {
438 if (mu > 0.)
439 f -= nNum * TMath::Log(mu*nDen/nNum);
440 else
441 f -= nmax * -1E30; // crossing our fingers
442 }
443 if (nDen - nNum != 0.) {
444 if (1. - mu > 0.)
445 f -= (nDen - nNum) * TMath::Log((1. - mu)*nDen/(nDen-nNum));
446 else
447 f -= nmax * -1E30; // crossing our fingers
448 }
449 }
450 }
451 }
452
454}
455
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:365
const Double_t kDefaultEpsilon
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float ymin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
float ymax
Definition: THbookFile.cxx:93
const std::vector< ROOT::Fit::ParameterSettings > & ParamsSettings() const
get the vector of parameter settings (const method)
Definition: FitConfig.h:85
ROOT::Math::MinimizerOptions & MinimizerOptions()
access to the minimizer control parameter (non const method)
Definition: FitConfig.h:166
class containg the result of the fit and all the related information (fitted parameter values,...
Definition: FitResult.h:48
bool IsEmpty() const
True if a fit result does not exist (even invalid) with parameter values.
Definition: FitResult.h:118
const std::vector< double > & Errors() const
parameter errors (return st::vector)
Definition: FitResult.h:170
const std::vector< double > & Parameters() const
parameter values (return std::vector)
Definition: FitResult.h:175
unsigned int Ndf() const
Number of degree of freedom.
Definition: FitResult.h:164
double MinFcnValue() const
Return value of the objective function (chi2 or likelihood) used in the fit.
Definition: FitResult.h:121
unsigned int NFreeParameters() const
get total number of free parameters
Definition: FitResult.h:135
int Status() const
minimizer status code
Definition: FitResult.h:138
Fitter class, entry point for performing all type of fits.
Definition: Fitter.h:77
bool FitFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Fit using the a generic FCN function as a C++ callable object implementing double () (const double *)...
Definition: Fitter.h:590
bool SetFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Set a generic FCN function as a C++ callable object implementing double () (const double *) Note that...
Definition: Fitter.h:595
const FitResult & Result() const
get fit result
Definition: Fitter.h:365
const FitConfig & Config() const
access to the fit configuration (const method)
Definition: Fitter.h:393
void SetFunction(const IModelFunction &func, bool useGradient=false)
Set the fitted function (model function) from a parametric function interface.
Definition: Fitter.cxx:103
Class, describing value, limits and step size of the parameters Provides functionality also to set/re...
Documentation for class Functor class.
Definition: Functor.h:392
Documentation for the abstract class IBaseFunctionMultiDim.
Definition: IFunction.h:62
double ErrorDef() const
error definition
void SetErrorDef(double err)
set error def
void SetPrintLevel(int level)
set print level
Class to Wrap a ROOT Function class (like TF1) in a IParamFunction interface of one dimensions to be ...
Definition: WrappedTF1.h:37
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition: TAxis.cxx:464
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition: TAxis.cxx:279
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:504
Int_t GetLast() const
Return last bin on the axis i.e.
Definition: TAxis.cxx:455
Int_t GetFirst() const
Return first bin on the axis i.e.
Definition: TAxis.cxx:444
Binomial fitter for the division of two histograms.
void Set(const TH1 *numerator, const TH1 *denominator)
Initialize with a new set of inputs.
virtual ~TBinomialEfficiencyFitter()
destructor
TBinomialEfficiencyFitter()
default constructor
ROOT::Fit::Fitter * GetFitter()
Provide access to the underlying fitter object.
Double_t EvaluateFCN(const Double_t *par)
void SetPrecision(Double_t epsilon)
Set the required integration precision, see TF1::Integral()
void ComputeFCN(Double_t &f, const Double_t *par)
Compute the likelihood.
TFitResultPtr Fit(TF1 *f1, Option_t *option="")
Carry out the fit of the given function to the given histograms.
1-Dim function class
Definition: TF1.h:211
virtual void GetParLimits(Int_t ipar, Double_t &parmin, Double_t &parmax) const
Return limits for parameter ipar.
Definition: TF1.cxx:1920
virtual void SetNDF(Int_t ndf)
Set the number of degrees of freedom ndf should be the number of points used in a fit - the number of...
Definition: TF1.cxx:3418
virtual Double_t GetParError(Int_t ipar) const
Return value of parameter number ipar.
Definition: TF1.cxx:1910
virtual void SetChisquare(Double_t chi2)
Definition: TF1.h:606
virtual Int_t GetNpar() const
Definition: TF1.h:475
virtual void SetParErrors(const Double_t *errors)
Set errors for all active parameters when calling this function, the array errors must have at least ...
Definition: TF1.cxx:3483
virtual Double_t Integral(Double_t a, Double_t b, Double_t epsrel=1.e-12)
IntegralOneDim or analytical integral.
Definition: TF1.cxx:2502
virtual Int_t GetNumberFitPoints() const
Definition: TF1.h:497
virtual void SetNumberFitPoints(Int_t npfits)
Definition: TF1.h:618
virtual void GetRange(Double_t *xmin, Double_t *xmax) const
Return range of a generic N-D function.
Definition: TF1.cxx:2263
virtual const char * GetParName(Int_t ipar) const
Definition: TF1.h:523
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:638
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1429
virtual Int_t GetNdim() const
Definition: TF1.h:479
virtual Double_t GetParameter(Int_t ipar) const
Definition: TF1.h:506
A 2-Dim function with parameters.
Definition: TF2.h:29
A 3-Dim function with parameters.
Definition: TF3.h:28
Provides an indirection to the TFitResult class and with a semantics identical to a TFitResult pointe...
Definition: TFitResultPtr.h:31
Extends the ROOT::Fit::Result class with a TNamed inheritance providing easy possibility for I/O.
Definition: TFitResult.h:32
The TH1 histogram class.
Definition: TH1.h:56
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8554
TAxis * GetZaxis()
Definition: TH1.h:318
virtual Int_t GetNbinsY() const
Definition: TH1.h:293
virtual Int_t GetNbinsZ() const
Definition: TH1.h:294
virtual Int_t GetDimension() const
Definition: TH1.h:278
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
virtual Int_t GetBin(Int_t binx, Int_t biny=0, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH1.cxx:4784
virtual Int_t GetNbinsX() const
Definition: TH1.h:292
TAxis * GetYaxis()
Definition: TH1.h:317
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4882
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:866
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:854
Basic string class.
Definition: TString.h:131
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
TF1 * f1
Definition: legend1.C:11
constexpr Double_t E()
Base of natural log:
Definition: TMath.h:97
Double_t Log(Double_t x)
Definition: TMath.h:748
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
REAL epsilon
Definition: triangle.c:617