 ROOT   Reference Guide rf403_weightedevts.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_roofit
3## \notebook
4##
5## 'DATA AND CATEGORIES' RooFit tutorial macro #403
6##
7## Using weights in unbinned datasets
8##
9## \macro_code
10##
11## \date February 2018
12## \author Clemens Lange
13## \author Wouter Verkerke (C version)
14
15from __future__ import print_function
16import ROOT
17
18
19# Create observable and unweighted dataset
20# -------------------------------------------
21
22# Declare observable
23x = ROOT.RooRealVar("x", "x", -10, 10)
24x.setBins(40)
25
26# Construction a uniform pdf
27p0 = ROOT.RooPolynomial("px", "px", x)
28
29# Sample 1000 events from pdf
30data = p0.generate({x}, 1000)
31
32# Calculate weight and make dataset weighted
33# --------------------------------------------------
34
35# Construct formula to calculate (fake) weight for events
36wFunc = ROOT.RooFormulaVar("w", "event weight", "(x*x+10)", [x])
37
38# Add column with variable w to previously generated dataset
40
41# Dataset d is now a dataset with two observable (x,w) with 1000 entries
42data.Print()
43
44# Instruct dataset wdata in interpret w as event weight rather than as
45# observable
46wdata = ROOT.RooDataSet(data.GetName(), data.GetTitle(), data, data.get(), "", w.GetName())
47
48# Dataset d is now a dataset with one observable (x) with 1000 entries and
49# a sum of weights of ~430K
50wdata.Print()
51
52# Unbinned ML fit to weighted data
53# ---------------------------------------------------------------
54
55# Construction quadratic polynomial pdf for fitting
56a0 = ROOT.RooRealVar("a0", "a0", 1)
57a1 = ROOT.RooRealVar("a1", "a1", 0, -1, 1)
58a2 = ROOT.RooRealVar("a2", "a2", 1, 0, 10)
59p2 = ROOT.RooPolynomial("p2", "p2", x, [a0, a1, a2], 0)
60
61# Fit quadratic polynomial to weighted data
62
63# NOTE: A plain Maximum likelihood fit to weighted data does in general
64# NOT result in correct error estimates, individual
65# event weights represent Poisson statistics themselves.
66#
67# Fit with 'wrong' errors
68r_ml_wgt = p2.fitTo(wdata, Save=True)
69
70# A first order correction to estimated parameter errors in an
71# (unbinned) ML fit can be obtained by calculating the
72# covariance matrix as
73#
74# V' = V C-1 V
75#
76# where V is the covariance matrix calculated from a fit
77# to -logL = - sum [ w_i log f(x_i) ] and C is the covariance
78# matrix calculated from -logL' = -sum [ w_i^2 log f(x_i) ]
79# (i.e. the weights are applied squared)
80#
81# A fit in self mode can be performed as follows:
82
83r_ml_wgt_corr = p2.fitTo(wdata, Save=True, SumW2Error=True)
84
85# Plot weighted data and fit result
86# ---------------------------------------------------------------
87
88# Construct plot frame
89frame = x.frame(ROOT.RooFit.Title("Unbinned ML fit, chi^2 fit to weighted data"))
90
91# Plot data using sum-of-weights-squared error rather than Poisson errors
92wdata.plotOn(frame, ROOT.RooFit.DataError(ROOT.RooAbsData.SumW2))
93
94# Overlay result of 2nd order polynomial fit to weighted data
95p2.plotOn(frame)
96
97# ML fit of pdf to equivalent unweighted dataset
98# ---------------------------------------------------------------------
99
100# Construct a pdf with the same shape as p0 after weighting
101genPdf = ROOT.RooGenericPdf("genPdf", "x*x+10", [x])
102
103# Sample a dataset with the same number of events as data
104data2 = genPdf.generate({x}, 1000)
105
106# Sample a dataset with the same number of weights as data
107data3 = genPdf.generate({x}, 43000)
108
109# Fit the 2nd order polynomial to both unweighted datasets and save the
110# results for comparison
111r_ml_unw10 = p2.fitTo(data2, Save=True)
112r_ml_unw43 = p2.fitTo(data3, Save=True)
113
114# Chis2 fit of pdf to binned weighted dataset
115# ---------------------------------------------------------------------------
116
117# Construct binned clone of unbinned weighted dataset
118binnedData = wdata.binnedClone()
119binnedData.Print("v")
120
121# Perform chi2 fit to binned weighted dataset using sum-of-weights errors
122#
123# NB: Within the usual approximations of a chi2 fit, chi2 fit to weighted
124# data using sum-of-weights-squared errors does give correct error
125# estimates
126chi2 = ROOT.RooChi2Var("chi2", "chi2", p2, binnedData, ROOT.RooFit.DataError(ROOT.RooAbsData.SumW2))
127m = ROOT.RooMinimizer(chi2)
129m.hesse()
130
131# Plot chi^2 fit result on frame as well
132r_chi2_wgt = m.save()
133p2.plotOn(frame, LineStyle="--", LineColor="r")
134
135# Compare fit results of chi2, L fits to (un)weighted data
136# ------------------------------------------------------------
137
138# Note that ML fit on 1Kevt of weighted data is closer to result of ML fit on 43Kevt of unweighted data
139# than to 1Kevt of unweighted data, the reference chi^2 fit with SumW2 error gives a result closer to
140# that of an unbinned ML fit to 1Kevt of unweighted data.
141
142print("==> ML Fit results on 1K unweighted events")
143r_ml_unw10.Print()
144print("==> ML Fit results on 43K unweighted events")
145r_ml_unw43.Print()
146print("==> ML Fit results on 1K weighted events with a summed weight of 43K")
147r_ml_wgt.Print()
148print("==> Corrected ML Fit results on 1K weighted events with a summed weight of 43K")
149r_ml_wgt_corr.Print()
150print("==> Chi2 Fit results on 1K weighted events with a summed weight of 43K")
151r_chi2_wgt.Print()
152
153c = ROOT.TCanvas("rf403_weightedevts", "rf403_weightedevts", 600, 600)