Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
rf403_weightedevts.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_roofit
3## \notebook
4##
5## 'DATA AND CATEGORIES' RooFit tutorial macro #403
6##
7## Using weights in unbinned datasets
8##
9## \macro_image
10## \macro_code
11## \macro_output
12##
13## \date February 2018
14## \authors Clemens Lange, Wouter Verkerke (C version)
15
16from __future__ import print_function
17import ROOT
18
19
20# Create observable and unweighted dataset
21# -------------------------------------------
22
23# Declare observable
24x = ROOT.RooRealVar("x", "x", -10, 10)
25x.setBins(40)
26
27# Construction a uniform pdf
28p0 = ROOT.RooPolynomial("px", "px", x)
29
30# Sample 1000 events from pdf
31data = p0.generate({x}, 1000)
32
33# Calculate weight and make dataset weighted
34# --------------------------------------------------
35
36# Construct formula to calculate (fake) weight for events
37wFunc = ROOT.RooFormulaVar("w", "event weight", "(x*x+10)", [x])
38
39# Add column with variable w to previously generated dataset
40w = data.addColumn(wFunc)
41
42# Dataset d is now a dataset with two observable (x,w) with 1000 entries
43data.Print()
44
45# Instruct dataset wdata in interpret w as event weight rather than as
46# observable
47wdata = ROOT.RooDataSet(data.GetName(), data.GetTitle(), data, data.get(), "", w.GetName())
48
49# Dataset d is now a dataset with one observable (x) with 1000 entries and
50# a sum of weights of ~430K
51wdata.Print()
52
53# Unbinned ML fit to weighted data
54# ---------------------------------------------------------------
55
56# Construction quadratic polynomial pdf for fitting
57a0 = ROOT.RooRealVar("a0", "a0", 1)
58a1 = ROOT.RooRealVar("a1", "a1", 0, -1, 1)
59a2 = ROOT.RooRealVar("a2", "a2", 1, 0, 10)
60p2 = ROOT.RooPolynomial("p2", "p2", x, [a0, a1, a2], 0)
61
62# Fit quadratic polynomial to weighted data
63
64# NOTE: A plain Maximum likelihood fit to weighted data does in general
65# NOT result in correct error estimates, individual
66# event weights represent Poisson statistics themselves.
67#
68# Fit with 'wrong' errors
69r_ml_wgt = p2.fitTo(wdata, Save=True, PrintLevel=-1)
70
71# A first order correction to estimated parameter errors in an
72# (unbinned) ML fit can be obtained by calculating the
73# covariance matrix as
74#
75# V' = V C-1 V
76#
77# where V is the covariance matrix calculated from a fit
78# to -logL = - sum [ w_i log f(x_i) ] and C is the covariance
79# matrix calculated from -logL' = -sum [ w_i^2 log f(x_i) ]
80# (i.e. the weights are applied squared)
81#
82# A fit in self mode can be performed as follows:
83
84r_ml_wgt_corr = p2.fitTo(wdata, Save=True, SumW2Error=True, PrintLevel=-1)
85
86# Plot weighted data and fit result
87# ---------------------------------------------------------------
88
89# Construct plot frame
90frame = x.frame(Title="Unbinned ML fit, chi^2 fit to weighted data")
91
92# Plot data using sum-of-weights-squared error rather than Poisson errors
93wdata.plotOn(frame, DataError="SumW2")
94
95# Overlay result of 2nd order polynomial fit to weighted data
96p2.plotOn(frame)
97
98# ML fit of pdf to equivalent unweighted dataset
99# ---------------------------------------------------------------------
100
101# Construct a pdf with the same shape as p0 after weighting
102genPdf = ROOT.RooGenericPdf("genPdf", "x*x+10", [x])
103
104# Sample a dataset with the same number of events as data
105data2 = genPdf.generate({x}, 1000)
106
107# Sample a dataset with the same number of weights as data
108data3 = genPdf.generate({x}, 43000)
109
110# Fit the 2nd order polynomial to both unweighted datasets and save the
111# results for comparison
112r_ml_unw10 = p2.fitTo(data2, Save=True, PrintLevel=-1)
113r_ml_unw43 = p2.fitTo(data3, Save=True, PrintLevel=-1)
114
115# Chis2 fit of pdf to binned weighted dataset
116# ---------------------------------------------------------------------------
117
118# Construct binned clone of unbinned weighted dataset
119binnedData = wdata.binnedClone()
120binnedData.Print("v")
121
122# Perform chi2 fit to binned weighted dataset using sum-of-weights errors
123#
124# NB: Within the usual approximations of a chi2 fit, chi2 fit to weighted
125# data using sum-of-weights-squared errors does give correct error
126# estimates
127chi2 = p2.createChi2(binnedData, ROOT.RooFit.DataError("SumW2"))
128m = ROOT.RooMinimizer(chi2)
129m.migrad()
130m.hesse()
131
132# Plot chi^2 fit result on frame as well
133r_chi2_wgt = m.save()
134p2.plotOn(frame, LineStyle="--", LineColor="r")
135
136# Compare fit results of chi2, L fits to (un)weighted data
137# ------------------------------------------------------------
138
139# Note that ML fit on 1Kevt of weighted data is closer to result of ML fit on 43Kevt of unweighted data
140# than to 1Kevt of unweighted data, the reference chi^2 fit with SumW2 error gives a result closer to
141# that of an unbinned ML fit to 1Kevt of unweighted data.
142
143print("==> ML Fit results on 1K unweighted events")
144r_ml_unw10.Print()
145print("==> ML Fit results on 43K unweighted events")
146r_ml_unw43.Print()
147print("==> ML Fit results on 1K weighted events with a summed weight of 43K")
148r_ml_wgt.Print()
149print("==> Corrected ML Fit results on 1K weighted events with a summed weight of 43K")
150r_ml_wgt_corr.Print()
151print("==> Chi2 Fit results on 1K weighted events with a summed weight of 43K")
152r_chi2_wgt.Print()
153
154c = ROOT.TCanvas("rf403_weightedevts", "rf403_weightedevts", 600, 600)
155ROOT.gPad.SetLeftMargin(0.15)
156frame.GetYaxis().SetTitleOffset(1.8)
157frame.Draw()
158
159c.SaveAs("rf403_weightedevts.png")