Logo ROOT  
Reference Guide
rf611_weightedfits.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roofit
3 /// \notebook -js
4 ///
5 /// Likelihood and minimization: Parameter uncertainties for weighted unbinned ML fits
6 ///
7 /// ## Parameter uncertainties for weighted unbinned ML fits
8 ///
9 /// Based on example from https://arxiv.org/abs/1911.01303
10 ///
11 /// This example compares different approaches to determining parameter uncertainties in weighted unbinned maximum likelihood fits.
12 /// Performing a weighted unbinned maximum likelihood fits can be useful to account for acceptance effects and to statistically subtract background events using the sPlot formalism.
13 /// It is however well known that the inverse Hessian matrix does not yield parameter uncertainties with correct coverage in the presence of event weights.
14 /// Three approaches to the determination of parameter uncertainties are compared in this example:
15 ///
16 /// 1. Using the inverse weighted Hessian matrix [`SumW2Error(false)`]
17 ///
18 /// 2. Using the expression [`SumW2Error(true)`]
19 /// \f[
20 /// V_{ij} = H_{ik}^{-1} C_{kl} H_{lj}^{-1}
21 /// \f]
22 /// where H is the weighted Hessian matrix and C is the Hessian matrix with squared weights
23 ///
24 /// 3. The asymptotically correct approach (for details please see https://arxiv.org/abs/1911.01303) [`Asymptotic(true)`]
25 /// \f[
26 /// V_{ij} = H_{ik}^{-1} D_{kl} H_{lj}^{-1}
27 /// \f]
28 /// where H is the weighted Hessian matrix and D is given by
29 /// \f[
30 /// D_{kl} = \sum_{e=1}^{N} w_e^2 \frac{\partial \log(P)}{\partial \lambda_k}\frac{\partial \log(P)}{\partial \lambda_l}
31 /// \f]
32 /// with the event weight \f$w_e\f$.
33 ///
34 /// The example performs the fit of a second order polynomial in the angle cos(theta) [-1,1] to a weighted data set.
35 /// The polynomial is given by
36 /// \f[
37 /// P = \frac{ 1 + c_0 \cdot \cos(\theta) + c_1 \cdot \cos(\theta) \cdot \cos(\theta) }{\mathrm{Norm}}
38 /// \f]
39 /// The two coefficients \f$ c_0 \f$ and \f$ c_1 \f$ and their uncertainties are to be determined in the fit.
40 ///
41 /// The per-event weight is used to correct for an acceptance effect, two different acceptance models can be studied:
42 /// - `acceptancemodel==1`: eff = \f$ 0.3 + 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$
43 /// - `acceptancemodel==2`: eff = \f$ 1.0 - 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$
44 /// The data is generated to be flat before the acceptance effect.
45 ///
46 /// The performance of the different approaches to determine parameter uncertainties is compared using the pull distributions from a large number of pseudoexperiments.
47 /// The pull is defined as \f$ (\lambda_i - \lambda_{gen})/\sigma(\lambda_i) \f$, where \f$ \lambda_i \f$ is the fitted parameter and \f$ \sigma(\lambda_i) \f$ its
48 /// uncertainty for pseudoexperiment number i.
49 /// If the fit is unbiased and the parameter uncertainties are estimated correctly, the pull distribution should be a Gaussian centered around zero with a width of one.
50 ///
51 /// \macro_image
52 /// \macro_output
53 /// \macro_code
54 ///
55 /// \date 11/2019
56 /// \author Christoph Langenbruch
57 
58 #include "TH1D.h"
59 #include "TCanvas.h"
60 #include "TROOT.h"
61 #include "TStyle.h"
62 #include "TRandom3.h"
63 #include "TLegend.h"
64 #include "RooRealVar.h"
65 #include "RooFitResult.h"
66 #include "RooDataSet.h"
67 #include "RooPolynomial.h"
68 
69 using namespace RooFit;
70 
71 
72 int rf611_weightedfits(int acceptancemodel=2) {
73  // I n i t i a l i s a t i o n a n d S e t u p
74  //------------------------------------------------
75 
76  //plotting options
77  gStyle->SetPaintTextFormat(".1f");
78  gStyle->SetEndErrorSize(6.0);
79  gStyle->SetTitleSize(0.05, "XY");
80  gStyle->SetLabelSize(0.05, "XY");
81  gStyle->SetTitleOffset(0.9, "XY");
82  gStyle->SetTextSize(0.05);
83  gStyle->SetPadLeftMargin(0.125);
84  gStyle->SetPadBottomMargin(0.125);
85  gStyle->SetPadTopMargin(0.075);
86  gStyle->SetPadRightMargin(0.075);
87  gStyle->SetMarkerStyle(20);
88  gStyle->SetMarkerSize(1.0);
91 
92  //initialise TRandom3
93  TRandom3* rnd = new TRandom3();
94  rnd->SetSeed(191101303);
95 
96  //accepted events and events weighted to account for the acceptance
97  TH1D* haccepted = new TH1D("haccepted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
98  TH1D* hweighted = new TH1D("hweighted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
99  //histograms holding pull distributions
100  //using the inverse Hessian matrix
101  TH1D* hc0pull1 = new TH1D("hc0pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
102  TH1D* hc1pull1 = new TH1D("hc1pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
103  //using the correction with the Hessian matrix with squared weights
104  TH1D* hc0pull2 = new TH1D("hc0pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
105  TH1D* hc1pull2 = new TH1D("hc1pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
106  //asymptotically correct approach
107  TH1D* hc0pull3 = new TH1D("hc0pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
108  TH1D* hc1pull3 = new TH1D("hc1pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
109 
110  //number of pseudoexperiments (toys) and number of events per pseudoexperiment
111  constexpr unsigned int ntoys = 500;
112  constexpr unsigned int nstats = 5000;
113  //parameters used in the generation
114  constexpr double c0gen = 0.0;
115  constexpr double c1gen = 0.0;
116 
117  // Silence fitting and minimisation messages
118  auto& msgSv = RooMsgService::instance();
119  msgSv.getStream(1).removeTopic(RooFit::Minimization);
120  msgSv.getStream(1).removeTopic(RooFit::Fitting);
121 
122  std::cout << "Running " << ntoys*3 << " toy fits ..." << std::endl;
123 
124  // M a i n l o o p : r u n p s e u d o e x p e r i m e n t s
125  //----------------------------------------------------------------
126  for (unsigned int i=0; i<ntoys; i++) {
127  //S e t u p p a r a m e t e r s a n d P D F
128  //-----------------------------------------------
129  //angle theta and the weight to account for the acceptance effect
130  RooRealVar costheta("costheta","costheta", -1.0, 1.0);
131  RooRealVar weight("weight","weight", 0.0, 1000.0);
132 
133  //initialise parameters to fit
134  RooRealVar c0("c0","0th-order coefficient", c0gen, -1.0, 1.0);
135  RooRealVar c1("c1","1st-order coefficient", c1gen, -1.0, 1.0);
136  c0.setError(0.01);
137  c1.setError(0.01);
138  //create simple second-order polynomial as probability density function
139  RooPolynomial pol("pol", "pol", costheta, RooArgList(c0, c1), 1);
140 
141  //G e n e r a t e d a t a s e t f o r p s e u d o e x p e r i m e n t i
142  //-------------------------------------------------------------------------------
143  RooDataSet data("data","data",RooArgSet(costheta, weight), WeightVar("weight"));
144  //generate nstats events
145  for (unsigned int j=0; j<nstats; j++) {
146  bool finished = false;
147  //use simple accept/reject for generation
148  while (!finished) {
149  costheta = 2.0*rnd->Rndm()-1.0;
150  //efficiency for the specific value of cos(theta)
151  double eff = 1.0;
152  if (acceptancemodel == 1)
153  eff = 1.0 - 0.7 * costheta.getVal()*costheta.getVal();
154  else
155  eff = 0.3 + 0.7 * costheta.getVal()*costheta.getVal();
156  //use 1/eff as weight to account for acceptance
157  weight = 1.0/eff;
158  //accept/reject
159  if (10.0*rnd->Rndm() < eff*pol.getVal())
160  finished = true;
161  }
162  haccepted->Fill(costheta.getVal());
163  hweighted->Fill(costheta.getVal(), weight.getVal());
164  data.add(RooArgSet(costheta, weight), weight.getVal());
165  }
166 
167  //F i t t o y u s i n g t h e t h r e e d i f f e r e n t a p p r o a c h e s t o u n c e r t a i n t y d e t e r m i n a t i o n
168  //-------------------------------------------------------------------------------------------------------------------------------------------------
169  //this uses the inverse weighted Hessian matrix
170  RooFitResult* result = pol.fitTo(data, Save(true), SumW2Error(false), PrintLevel(-1), BatchMode(true));
171  hc0pull1->Fill((c0.getVal()-c0gen)/c0.getError());
172  hc1pull1->Fill((c1.getVal()-c1gen)/c1.getError());
173 
174  //this uses the correction with the Hesse matrix with squared weights
175  result = pol.fitTo(data, Save(true), SumW2Error(true), PrintLevel(-1), BatchMode(true));
176  hc0pull2->Fill((c0.getVal()-c0gen)/c0.getError());
177  hc1pull2->Fill((c1.getVal()-c1gen)/c1.getError());
178 
179  //this uses the asymptotically correct approach
180  result = pol.fitTo(data, Save(true), AsymptoticError(true), PrintLevel(-1), BatchMode(true));
181  hc0pull3->Fill((c0.getVal()-c0gen)/c0.getError());
182  hc1pull3->Fill((c1.getVal()-c1gen)/c1.getError());
183  }
184 
185  std::cout << "... done." << std::endl;
186 
187  // P l o t o u t p u t d i s t r i b u t i o n s
188  //--------------------------------------------------
189 
190  //plot accepted (weighted) events
191  gStyle->SetOptStat(0);
192  gStyle->SetOptFit(0);
193  TCanvas* cevents = new TCanvas("cevents", "cevents", 800, 600);
194  cevents->cd(1);
195  hweighted->SetMinimum(0.0);
196  hweighted->SetLineColor(2);
197  hweighted->Draw("hist");
198  haccepted->Draw("same hist");
199  TLegend* leg = new TLegend(0.6, 0.8, 0.9, 0.9);
200  leg->AddEntry(haccepted, "Accepted");
201  leg->AddEntry(hweighted, "Weighted");
202  leg->Draw();
203  cevents->Update();
204 
205  //plot pull distributions
206  TCanvas* cpull = new TCanvas("cpull", "cpull", 1200, 800);
207  cpull->Divide(3,2);
208  cpull->cd(1);
209  gStyle->SetOptStat(1100);
210  gStyle->SetOptFit(11);
211  hc0pull1->Fit("gaus");
212  hc0pull1->Draw("ep");
213  cpull->cd(2);
214  hc0pull2->Fit("gaus");
215  hc0pull2->Draw("ep");
216  cpull->cd(3);
217  hc0pull3->Fit("gaus");
218  hc0pull3->Draw("ep");
219  cpull->cd(4);
220  hc1pull1->Fit("gaus");
221  hc1pull1->Draw("ep");
222  cpull->cd(5);
223  hc1pull2->Fit("gaus");
224  hc1pull2->Draw("ep");
225  cpull->cd(6);
226  hc1pull3->Fit("gaus");
227  hc1pull3->Draw("ep");
228  cpull->Update();
229 
230  return 0;
231 }
RooFit::BatchMode
RooCmdArg BatchMode(bool flag=true)
Definition: RooGlobalFunc.cxx:158
TStyle::SetLabelSize
void SetLabelSize(Float_t size=0.04, Option_t *axis="X")
Set size of axis labels.
Definition: TStyle.cxx:1395
RooFit::Minimization
@ Minimization
Definition: RooGlobalFunc.h:67
TStyle::SetPadBottomMargin
void SetPadBottomMargin(Float_t margin=0.1)
Definition: TStyle.h:341
TStyle::SetPaintTextFormat
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:369
TH1::SetMinimum
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:396
RooFit::PrintLevel
RooCmdArg PrintLevel(Int_t code)
Definition: RooGlobalFunc.cxx:189
TStyle::SetTitleSize
void SetTitleSize(Float_t size=0.02, Option_t *axis="X")
Definition: TStyle.cxx:1770
RooFit::SumW2Error
RooCmdArg SumW2Error(Bool_t flag)
Definition: RooGlobalFunc.cxx:207
RooArgList
Definition: RooArgList.h:21
TLegend.h
RooFit::WeightVar
RooCmdArg WeightVar(const char *name, Bool_t reinterpretAsWeight=kFALSE)
Definition: RooGlobalFunc.cxx:123
TH1D
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:615
TStyle.h
TStyle::SetEndErrorSize
void SetEndErrorSize(Float_t np=2)
Set the size (in pixels) of the small lines drawn at the end of the error bars (TH1 or TGraphErrors).
Definition: TStyle.cxx:1291
TPad::Divide
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1166
TCanvas.h
RooDataSet.h
RooFitResult
Definition: RooFitResult.h:40
TStyle::SetOptFit
void SetOptFit(Int_t fit=1)
The type of information about fit parameters printed in the histogram statistics box can be selected ...
Definition: TStyle.cxx:1544
RooPolynomial.h
TROOT.h
gStyle
R__EXTERN TStyle * gStyle
Definition: TStyle.h:412
TRandom3
Definition: TRandom3.h:27
RooFit
Definition: RooCFunction1Binding.h:29
TRandom3::SetSeed
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:206
TCanvas::cd
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:704
RooPolynomial
Definition: RooPolynomial.h:28
RooFit::Fitting
@ Fitting
Definition: RooGlobalFunc.h:67
TH1::Fill
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3274
TRandom3.h
TStyle::SetHistLineWidth
void SetHistLineWidth(Width_t width=1)
Definition: TStyle.h:366
TStyle::SetPadTopMargin
void SetPadTopMargin(Float_t margin=0.1)
Definition: TStyle.h:342
TStyle::SetPadLeftMargin
void SetPadLeftMargin(Float_t margin=0.1)
Definition: TStyle.h:343
RooRealVar.h
TStyle::SetOptStat
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1592
RooFitResult.h
TStyle::SetPadRightMargin
void SetPadRightMargin(Float_t margin=0.1)
Definition: TStyle.h:344
TStyle::SetHistLineColor
void SetHistLineColor(Color_t color=1)
Definition: TStyle.h:363
TCanvas
Definition: TCanvas.h:23
TRandom3::Rndm
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:99
TCanvas::Update
virtual void Update()
Update canvas pad buffers.
Definition: TCanvas.cxx:2500
leg
leg
Definition: legend1.C:34
RooDataSet
Definition: RooDataSet.h:33
TH1::Fit
virtual TFitResultPtr Fit(const char *formula, Option_t *option="", Option_t *goption="", Double_t xmin=0, Double_t xmax=0)
Fit histogram with function fname.
Definition: TH1.cxx:3807
TLegend
Definition: TLegend.h:23
RooRealVar
Definition: RooRealVar.h:35
TH1D.h
RooFit::Save
RooCmdArg Save(Bool_t flag=kTRUE)
Definition: RooGlobalFunc.cxx:187
TStyle::SetTitleOffset
void SetTitleOffset(Float_t offset=1, Option_t *axis="X")
Specify a parameter offset to control the distance between the axis and the axis title.
Definition: TStyle.cxx:1751
RooMsgService::instance
static RooMsgService & instance()
Return reference to singleton instance.
Definition: RooMsgService.cxx:363
RooFit::AsymptoticError
RooCmdArg AsymptoticError(Bool_t flag)
Definition: RooGlobalFunc.cxx:208
RooArgSet
Definition: RooArgSet.h:28
TH1::Draw
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2997
c1
return c1
Definition: legend1.C:41