Logo ROOT  
Reference Guide
rf611_weightedfits.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_roofit
3/// \notebook -js
4/// Likelihood and minimization: Parameter uncertainties for weighted unbinned ML fits
5///
6/// ## Parameter uncertainties for weighted unbinned ML fits
7///
8/// Based on example from https://arxiv.org/abs/1911.01303
9///
10/// This example compares different approaches to determining parameter uncertainties in weighted unbinned maximum likelihood fits.
11/// Performing a weighted unbinned maximum likelihood fits can be useful to account for acceptance effects and to statistically subtract background events using the sPlot formalism.
12/// It is however well known that the inverse Hessian matrix does not yield parameter uncertainties with correct coverage in the presence of event weights.
13/// Three approaches to the determination of parameter uncertainties are compared in this example:
14///
15/// 1. Using the inverse weighted Hessian matrix [`SumW2Error(false)`]
16///
17/// 2. Using the expression [`SumW2Error(true)`]
18/// \f[
19/// V_{ij} = H_{ik}^{-1} C_{kl} H_{lj}^{-1}
20/// \f]
21/// where H is the weighted Hessian matrix and C is the Hessian matrix with squared weights
22///
23/// 3. The asymptotically correct approach (for details please see https://arxiv.org/abs/1911.01303) [`Asymptotic(true)`]
24/// \f[
25/// V_{ij} = H_{ik}^{-1} D_{kl} H_{lj}^{-1}
26/// \f]
27/// where H is the weighted Hessian matrix and D is given by
28/// \f[
29/// D_{kl} = \sum_{e=1}^{N} w_e^2 \frac{\partial \log(P)}{\partial \lambda_k}\frac{\partial \log(P)}{\partial \lambda_l}
30/// \f]
31/// with the event weight \f$w_e\f$.
32///
33/// The example performs the fit of a second order polynomial in the angle cos(theta) [-1,1] to a weighted data set.
34/// The polynomial is given by
35/// \f[
36/// P = \frac{ 1 + c_0 \cdot \cos(\theta) + c_1 \cdot \cos(\theta) \cdot \cos(\theta) }{\mathrm{Norm}}
37/// \f]
38/// The two coefficients \f$ c_0 \f$ and \f$ c_1 \f$ and their uncertainties are to be determined in the fit.
39///
40/// The per-event weight is used to correct for an acceptance effect, two different acceptance models can be studied:
41/// - `acceptancemodel==1`: eff = \f$ 0.3 + 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$
42/// - `acceptancemodel==2`: eff = \f$ 1.0 - 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$
43/// The data is generated to be flat before the acceptance effect.
44///
45/// The performance of the different approaches to determine parameter uncertainties is compared using the pull distributions from a large number of pseudoexperiments.
46/// The pull is defined as \f$ (\lambda_i - \lambda_{gen})/\sigma(\lambda_i) \f$, where \f$ \lambda_i \f$ is the fitted parameter and \f$ \sigma(\lambda_i) \f$ its
47/// uncertainty for pseudoexperiment number i.
48/// If the fit is unbiased and the parameter uncertainties are estimated correctly, the pull distribution should be a Gaussian centered around zero with a width of one.
49///
50/// \macro_image
51/// \macro_output
52/// \macro_code
53///
54/// \date November 2019
55/// \author Christoph Langenbruch
56
57#include "TH1D.h"
58#include "TCanvas.h"
59#include "TROOT.h"
60#include "TStyle.h"
61#include "TRandom3.h"
62#include "TLegend.h"
63#include "RooRealVar.h"
64#include "RooFitResult.h"
65#include "RooDataSet.h"
66#include "RooPolynomial.h"
67
68using namespace RooFit;
69
70
71int rf611_weightedfits(int acceptancemodel=2) {
72 // I n i t i a l i s a t i o n a n d S e t u p
73 //------------------------------------------------
74
75 //plotting options
78 gStyle->SetTitleSize(0.05, "XY");
79 gStyle->SetLabelSize(0.05, "XY");
80 gStyle->SetTitleOffset(0.9, "XY");
81 gStyle->SetTextSize(0.05);
84 gStyle->SetPadTopMargin(0.075);
90
91 //initialise TRandom3
92 TRandom3* rnd = new TRandom3();
93 rnd->SetSeed(191101303);
94
95 //accepted events and events weighted to account for the acceptance
96 TH1D* haccepted = new TH1D("haccepted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
97 TH1D* hweighted = new TH1D("hweighted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
98 //histograms holding pull distributions
99 //using the inverse Hessian matrix
100 TH1D* hc0pull1 = new TH1D("hc0pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
101 TH1D* hc1pull1 = new TH1D("hc1pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
102 //using the correction with the Hessian matrix with squared weights
103 TH1D* hc0pull2 = new TH1D("hc0pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
104 TH1D* hc1pull2 = new TH1D("hc1pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
105 //asymptotically correct approach
106 TH1D* hc0pull3 = new TH1D("hc0pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
107 TH1D* hc1pull3 = new TH1D("hc1pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
108
109 //number of pseudoexperiments (toys) and number of events per pseudoexperiment
110 constexpr unsigned int ntoys = 500;
111 constexpr unsigned int nstats = 5000;
112 //parameters used in the generation
113 constexpr double c0gen = 0.0;
114 constexpr double c1gen = 0.0;
115
116 // Silence fitting and minimisation messages
117 auto& msgSv = RooMsgService::instance();
118 msgSv.getStream(1).removeTopic(RooFit::Minimization);
119 msgSv.getStream(1).removeTopic(RooFit::Fitting);
120
121 std::cout << "Running " << ntoys*3 << " toy fits ..." << std::endl;
122
123 // M a i n l o o p : r u n p s e u d o e x p e r i m e n t s
124 //----------------------------------------------------------------
125 for (unsigned int i=0; i<ntoys; i++) {
126 //S e t u p p a r a m e t e r s a n d P D F
127 //-----------------------------------------------
128 //angle theta and the weight to account for the acceptance effect
129 RooRealVar costheta("costheta","costheta", -1.0, 1.0);
130 RooRealVar weight("weight","weight", 0.0, 1000.0);
131
132 //initialise parameters to fit
133 RooRealVar c0("c0","0th-order coefficient", c0gen, -1.0, 1.0);
134 RooRealVar c1("c1","1st-order coefficient", c1gen, -1.0, 1.0);
135 c0.setError(0.01);
136 c1.setError(0.01);
137 //create simple second-order polynomial as probability density function
138 RooPolynomial pol("pol", "pol", costheta, RooArgList(c0, c1), 1);
139
140 //G e n e r a t e d a t a s e t f o r p s e u d o e x p e r i m e n t i
141 //-------------------------------------------------------------------------------
142 RooDataSet data("data","data",RooArgSet(costheta, weight), WeightVar("weight"));
143 //generate nstats events
144 for (unsigned int j=0; j<nstats; j++) {
145 bool finished = false;
146 //use simple accept/reject for generation
147 while (!finished) {
148 costheta = 2.0*rnd->Rndm()-1.0;
149 //efficiency for the specific value of cos(theta)
150 double eff = 1.0;
151 if (acceptancemodel == 1)
152 eff = 1.0 - 0.7 * costheta.getVal()*costheta.getVal();
153 else
154 eff = 0.3 + 0.7 * costheta.getVal()*costheta.getVal();
155 //use 1/eff as weight to account for acceptance
156 weight = 1.0/eff;
157 //accept/reject
158 if (10.0*rnd->Rndm() < eff*pol.getVal())
159 finished = true;
160 }
161 haccepted->Fill(costheta.getVal());
162 hweighted->Fill(costheta.getVal(), weight.getVal());
163 data.add(RooArgSet(costheta, weight), weight.getVal());
164 }
165
166 //F i t t o y u s i n g t h e t h r e e d i f f e r e n t a p p r o a c h e s t o u n c e r t a i n t y d e t e r m i n a t i o n
167 //-------------------------------------------------------------------------------------------------------------------------------------------------
168 //this uses the inverse weighted Hessian matrix
169 RooFitResult* result = pol.fitTo(data, Save(true), SumW2Error(false), PrintLevel(-1), BatchMode(true));
170 hc0pull1->Fill((c0.getVal()-c0gen)/c0.getError());
171 hc1pull1->Fill((c1.getVal()-c1gen)/c1.getError());
172
173 //this uses the correction with the Hesse matrix with squared weights
174 result = pol.fitTo(data, Save(true), SumW2Error(true), PrintLevel(-1), BatchMode(true));
175 hc0pull2->Fill((c0.getVal()-c0gen)/c0.getError());
176 hc1pull2->Fill((c1.getVal()-c1gen)/c1.getError());
177
178 //this uses the asymptotically correct approach
179 result = pol.fitTo(data, Save(true), AsymptoticError(true), PrintLevel(-1), BatchMode(true));
180 hc0pull3->Fill((c0.getVal()-c0gen)/c0.getError());
181 hc1pull3->Fill((c1.getVal()-c1gen)/c1.getError());
182 }
183
184 std::cout << "... done." << std::endl;
185
186 // P l o t o u t p u t d i s t r i b u t i o n s
187 //--------------------------------------------------
188
189 //plot accepted (weighted) events
190 gStyle->SetOptStat(0);
191 gStyle->SetOptFit(0);
192 TCanvas* cevents = new TCanvas("cevents", "cevents", 800, 600);
193 cevents->cd(1);
194 hweighted->SetMinimum(0.0);
195 hweighted->SetLineColor(2);
196 hweighted->Draw("hist");
197 haccepted->Draw("same hist");
198 TLegend* leg = new TLegend(0.6, 0.8, 0.9, 0.9);
199 leg->AddEntry(haccepted, "Accepted");
200 leg->AddEntry(hweighted, "Weighted");
201 leg->Draw();
202 cevents->Update();
203
204 //plot pull distributions
205 TCanvas* cpull = new TCanvas("cpull", "cpull", 1200, 800);
206 cpull->Divide(3,2);
207 cpull->cd(1);
208 gStyle->SetOptStat(1100);
209 gStyle->SetOptFit(11);
210 hc0pull1->Fit("gaus");
211 hc0pull1->Draw("ep");
212 cpull->cd(2);
213 hc0pull2->Fit("gaus");
214 hc0pull2->Draw("ep");
215 cpull->cd(3);
216 hc0pull3->Fit("gaus");
217 hc0pull3->Draw("ep");
218 cpull->cd(4);
219 hc1pull1->Fit("gaus");
220 hc1pull1->Draw("ep");
221 cpull->cd(5);
222 hc1pull2->Fit("gaus");
223 hc1pull2->Draw("ep");
224 cpull->cd(6);
225 hc1pull3->Fit("gaus");
226 hc1pull3->Draw("ep");
227 cpull->Update();
228
229 return 0;
230}
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
R__EXTERN TStyle * gStyle
Definition: TStyle.h:414
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgList.h:22
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:56
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:55
RooFitResult is a container class to hold the input and output of a PDF fit to a dataset.
Definition: RooFitResult.h:40
static RooMsgService & instance()
Return reference to singleton instance.
RooPolynomial implements a polynomial p.d.f of the form.
Definition: RooPolynomial.h:28
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:40
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
virtual void SetMarkerStyle(Style_t mstyle=1)
Set the marker style.
Definition: TAttMarker.h:40
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:45
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition: TAttText.h:47
The Canvas class.
Definition: TCanvas.h:23
TVirtualPad * cd(Int_t subpadnumber=0) override
Set current canvas & pad.
Definition: TCanvas.cxx:711
void Update() override
Update canvas pad buffers.
Definition: TCanvas.cxx:2478
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:620
virtual TFitResultPtr Fit(const char *formula, Option_t *option="", Option_t *goption="", Double_t xmin=0, Double_t xmax=0)
Fit histogram with function fname.
Definition: TH1.cxx:3894
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3338
void Draw(Option_t *option="") override
Draw this histogram with options.
Definition: TH1.cxx:3060
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:401
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:23
void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0) override
Automatic pad generation by division.
Definition: TPad.cxx:1153
Random number generator class based on M.
Definition: TRandom3.h:27
Double_t Rndm() override
Machine independent random number generator.
Definition: TRandom3.cxx:99
void SetSeed(ULong_t seed=0) override
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:206
void SetPadTopMargin(Float_t margin=0.1)
Definition: TStyle.h:343
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1589
void SetPadBottomMargin(Float_t margin=0.1)
Definition: TStyle.h:342
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:370
void SetEndErrorSize(Float_t np=2)
Set the size (in pixels) of the small lines drawn at the end of the error bars (TH1 or TGraphErrors).
Definition: TStyle.cxx:1289
void SetPadRightMargin(Float_t margin=0.1)
Definition: TStyle.h:345
void SetTitleOffset(Float_t offset=1, Option_t *axis="X")
Specify a parameter offset to control the distance between the axis and the axis title.
Definition: TStyle.cxx:1747
void SetPadLeftMargin(Float_t margin=0.1)
Definition: TStyle.h:344
void SetHistLineColor(Color_t color=1)
Definition: TStyle.h:364
void SetTitleSize(Float_t size=0.02, Option_t *axis="X")
Definition: TStyle.cxx:1766
void SetHistLineWidth(Width_t width=1)
Definition: TStyle.h:367
void SetLabelSize(Float_t size=0.04, Option_t *axis="X")
Set size of axis labels.
Definition: TStyle.cxx:1393
void SetOptFit(Int_t fit=1)
The type of information about fit parameters printed in the histogram statistics box can be selected ...
Definition: TStyle.cxx:1542
RooCmdArg WeightVar(const char *name, bool reinterpretAsWeight=false)
RooCmdArg AsymptoticError(bool flag)
RooCmdArg Save(bool flag=true)
RooCmdArg SumW2Error(bool flag)
RooCmdArg PrintLevel(Int_t code)
RooCmdArg BatchMode(std::string const &batchMode="cpu")
return c1
Definition: legend1.C:41
leg
Definition: legend1.C:34
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition: Common.h:18
@ Minimization
Definition: RooGlobalFunc.h:61