ROOT   Reference Guide
rs_numberCountingCombination.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook -js
4 /// 'Number Counting Example' RooStats tutorial macro #100
5 ///
6 /// This tutorial shows an example of a combination of
7 /// two searches using number counting with background uncertainty.
8 ///
9 /// The macro uses a RooStats "factory" to construct a PDF
10 /// that represents the two number counting analyses with background
11 /// uncertainties. The uncertainties are taken into account by
12 /// considering a sideband measurement of a size that corresponds to the
13 /// background uncertainty. The problem has been studied in these references:
14 /// - http://arxiv.org/abs/physics/0511028
15 /// - http://arxiv.org/abs/physics/0702156
16 /// - http://cdsweb.cern.ch/record/1099969?ln=en
17 ///
18 /// After using the factory to make the model, we use a RooStats
19 /// ProfileLikelihoodCalculator for a Hypothesis test and a confidence interval.
20 /// The calculator takes into account systematics by eliminating nuisance parameters
21 /// with the profile likelihood. This is equivalent to the method of MINOS.
22 ///
23 ///
24 /// \macro_image
25 /// \macro_output
26 /// \macro_code
27 ///
28 /// \author Kyle Cranmer
29
32 #include "RooStats/ConfInterval.h"
35 #include "RooRealVar.h"
36
37 #include <cassert>
38
40 using namespace RooFit;
41 using namespace RooStats;
42
43 // declare three variations on the same tutorial
44 void rs_numberCountingCombination_expected();
45 void rs_numberCountingCombination_observed();
46 void rs_numberCountingCombination_observedWithTau();
47
48 // -------------------------------
49 // main driver to choose one
50 void rs_numberCountingCombination(int flag = 1)
51 {
52  if (flag == 1)
53  rs_numberCountingCombination_expected();
54  if (flag == 2)
55  rs_numberCountingCombination_observed();
56  if (flag == 3)
57  rs_numberCountingCombination_observedWithTau();
58 }
59
60 // -------------------------------
61 void rs_numberCountingCombination_expected()
62 {
63
64  /////////////////////////////////////////
65  // An example of a number counting combination with two channels.
66  // We consider both hypothesis testing and the equivalent confidence interval.
67  /////////////////////////////////////////
68
69  /////////////////////////////////////////
70  // The Model building stage
71  /////////////////////////////////////////
72
73  // Step 1, define arrays with signal & bkg expectations and background uncertainties
74  Double_t s[2] = {20., 10.}; // expected signal
75  Double_t b[2] = {100., 100.}; // expected background
76  Double_t db[2] = {.0100, .0100}; // fractional background uncertainty
77
78  // Step 2, use a RooStats factory to build a PDF for a
79  // number counting combination and add it to the workspace.
80  // We need to give the signal expectation to relate the masterSignal
81  // to the signal contribution in the individual channels.
82  // The model neglects correlations in background uncertainty,
83  // but they could be added without much change to the example.
85  RooWorkspace *wspace = new RooWorkspace();
86  f.AddModel(s, 2, wspace, "TopLevelPdf", "masterSignal");
87
88  // Step 3, use a RooStats factory to add datasets to the workspace.
89  // Step 3a.
90  // Add the expected data to the workspace
91  f.AddExpData(s, b, db, 2, wspace, "ExpectedNumberCountingData");
92
93  // see below for a printout of the workspace
94  // wspace->Print(); //uncomment to see structure of workspace
95
96  /////////////////////////////////////////
97  // The Hypothesis testing stage:
98  /////////////////////////////////////////
99  // Step 4, Define the null hypothesis for the calculator
100  // Here you need to know the name of the variables corresponding to hypothesis.
101  RooRealVar *mu = wspace->var("masterSignal");
102  RooArgSet *poi = new RooArgSet(*mu);
103  RooArgSet *nullParams = new RooArgSet("nullParams");
105  // here we explicitly set the value of the parameters for the null
106  nullParams->setRealValue("masterSignal", 0);
107
108  // Step 5, Create a calculator for doing the hypothesis test.
109  // because this is a
110  ProfileLikelihoodCalculator plc(*wspace->data("ExpectedNumberCountingData"), *wspace->pdf("TopLevelPdf"), *poi, 0.05,
111  nullParams);
112
113  // Step 6, Use the Calculator to get a HypoTestResult
114  HypoTestResult *htr = plc.GetHypoTest();
115  assert(htr != 0);
116  cout << "-------------------------------------------------" << endl;
117  cout << "The p-value for the null is " << htr->NullPValue() << endl;
118  cout << "Corresponding to a significance of " << htr->Significance() << endl;
119  cout << "-------------------------------------------------\n\n" << endl;
120
121  /* expected case should return:
122  -------------------------------------------------
123  The p-value for the null is 0.015294
124  Corresponding to a significance of 2.16239
125  -------------------------------------------------
126  */
127
128  //////////////////////////////////////////
129  // Confidence Interval Stage
130
131  // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval.
132  // We need to specify what are our parameters of interest
133  RooArgSet *paramsOfInterest = nullParams; // they are the same as before in this case
134  plc.SetParameters(*paramsOfInterest);
135  LikelihoodInterval *lrint = (LikelihoodInterval *)plc.GetInterval(); // that was easy.
136  lrint->SetConfidenceLevel(0.95);
137
138  // Step 9, make a plot of the likelihood ratio and the interval obtained
139  // paramsOfInterest->setRealValue("masterSignal",1.);
140  // find limits
141  double lower = lrint->LowerLimit(*mu);
142  double upper = lrint->UpperLimit(*mu);
143
144  LikelihoodIntervalPlot lrPlot(lrint);
145  lrPlot.SetMaximum(3.);
146  lrPlot.Draw();
147
148  // Step 10a. Get upper and lower limits
149  cout << "lower limit on master signal = " << lower << endl;
150  cout << "upper limit on master signal = " << upper << endl;
151
152  // Step 10b, Ask if masterSignal=0 is in the interval.
153  // Note, this is equivalent to the question of a 2-sigma hypothesis test:
154  // "is the parameter point masterSignal=0 inside the 95% confidence interval?"
155  // Since the significance of the Hypothesis test was > 2-sigma it should not be:
156  // eg. we exclude masterSignal=0 at 95% confidence.
157  paramsOfInterest->setRealValue("masterSignal", 0.);
158  cout << "-------------------------------------------------" << endl;
159  std::cout << "Consider this parameter point:" << std::endl;
160  paramsOfInterest->first()->Print();
161  if (lrint->IsInInterval(*paramsOfInterest))
162  std::cout << "It IS in the interval." << std::endl;
163  else
164  std::cout << "It is NOT in the interval." << std::endl;
165  cout << "-------------------------------------------------\n\n" << endl;
166
167  // Step 10c, We also ask about the parameter point masterSignal=2, which is inside the interval.
168  paramsOfInterest->setRealValue("masterSignal", 2.);
169  cout << "-------------------------------------------------" << endl;
170  std::cout << "Consider this parameter point:" << std::endl;
171  paramsOfInterest->first()->Print();
172  if (lrint->IsInInterval(*paramsOfInterest))
173  std::cout << "It IS in the interval." << std::endl;
174  else
175  std::cout << "It is NOT in the interval." << std::endl;
176  cout << "-------------------------------------------------\n\n" << endl;
177
178  delete lrint;
179  delete htr;
180  delete wspace;
181  delete poi;
182  delete nullParams;
183
184  /*
185  // Here's an example of what is in the workspace
186  // wspace->Print();
187  RooWorkspace(NumberCountingWS) Number Counting WS contents
188
189  variables
190  ---------
191  (x_0,masterSignal,expected_s_0,b_0,y_0,tau_0,x_1,expected_s_1,b_1,y_1,tau_1)
192
193  p.d.f.s
194  -------
195  RooProdPdf::joint[ pdfs=(sigRegion_0,sideband_0,sigRegion_1,sideband_1) ] = 2.20148e-08
196  RooPoisson::sigRegion_0[ x=x_0 mean=splusb_0 ] = 0.036393
197  RooPoisson::sideband_0[ x=y_0 mean=bTau_0 ] = 0.00398939
198  RooPoisson::sigRegion_1[ x=x_1 mean=splusb_1 ] = 0.0380088
199  RooPoisson::sideband_1[ x=y_1 mean=bTau_1 ] = 0.00398939
200
201  functions
202  --------
203  RooAddition::splusb_0[ set1=(s_0,b_0) set2=() ] = 120
204  RooProduct::s_0[ compRSet=(masterSignal,expected_s_0) compCSet=() ] = 20
205  RooProduct::bTau_0[ compRSet=(b_0,tau_0) compCSet=() ] = 10000
206  RooAddition::splusb_1[ set1=(s_1,b_1) set2=() ] = 110
207  RooProduct::s_1[ compRSet=(masterSignal,expected_s_1) compCSet=() ] = 10
208  RooProduct::bTau_1[ compRSet=(b_1,tau_1) compCSet=() ] = 10000
209
210  datasets
211  --------
212  RooDataSet::ExpectedNumberCountingData(x_0,y_0,x_1,y_1)
213
214  embedded pre-calculated expensive components
215  -------------------------------------------
216  */
217 }
218
219 void rs_numberCountingCombination_observed()
220 {
221
222  /////////////////////////////////////////
223  // The same example with observed data in a main
224  // measurement and an background-only auxiliary
225  // measurement with a factor tau more background
226  // than in the main measurement.
227
228  /////////////////////////////////////////
229  // The Model building stage
230  /////////////////////////////////////////
231
232  // Step 1, define arrays with signal & bkg expectations and background uncertainties
233  // We still need the expectation to relate signal in different channels with the master signal
234  Double_t s[2] = {20., 10.}; // expected signal
235
236  // Step 2, use a RooStats factory to build a PDF for a
237  // number counting combination and add it to the workspace.
238  // We need to give the signal expectation to relate the masterSignal
239  // to the signal contribution in the individual channels.
240  // The model neglects correlations in background uncertainty,
241  // but they could be added without much change to the example.
243  RooWorkspace *wspace = new RooWorkspace();
244  f.AddModel(s, 2, wspace, "TopLevelPdf", "masterSignal");
245
246  // Step 3, use a RooStats factory to add datasets to the workspace.
247  // Add the observed data to the workspace
248  Double_t mainMeas[2] = {123., 117.}; // observed main measurement
249  Double_t bkgMeas[2] = {111.23, 98.76}; // observed background
250  Double_t dbMeas[2] = {.011, .0095}; // observed fractional background uncertainty
251  f.AddData(mainMeas, bkgMeas, dbMeas, 2, wspace, "ObservedNumberCountingData");
252
253  // see below for a printout of the workspace
254  // wspace->Print(); //uncomment to see structure of workspace
255
256  /////////////////////////////////////////
257  // The Hypothesis testing stage:
258  /////////////////////////////////////////
259  // Step 4, Define the null hypothesis for the calculator
260  // Here you need to know the name of the variables corresponding to hypothesis.
261  RooRealVar *mu = wspace->var("masterSignal");
262  RooArgSet *poi = new RooArgSet(*mu);
263  RooArgSet *nullParams = new RooArgSet("nullParams");
265  // here we explicitly set the value of the parameters for the null
266  nullParams->setRealValue("masterSignal", 0);
267
268  // Step 5, Create a calculator for doing the hypothesis test.
269  // because this is a
270  ProfileLikelihoodCalculator plc(*wspace->data("ObservedNumberCountingData"), *wspace->pdf("TopLevelPdf"), *poi, 0.05,
271  nullParams);
272
273  wspace->var("tau_0")->Print();
274  wspace->var("tau_1")->Print();
275
276  // Step 7, Use the Calculator to get a HypoTestResult
277  HypoTestResult *htr = plc.GetHypoTest();
278  cout << "-------------------------------------------------" << endl;
279  cout << "The p-value for the null is " << htr->NullPValue() << endl;
280  cout << "Corresponding to a significance of " << htr->Significance() << endl;
281  cout << "-------------------------------------------------\n\n" << endl;
282
283  /* observed case should return:
284  -------------------------------------------------
285  The p-value for the null is 0.0351669
286  Corresponding to a significance of 1.80975
287  -------------------------------------------------
288  */
289
290  //////////////////////////////////////////
291  // Confidence Interval Stage
292
293  // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval.
294  // We need to specify what are our parameters of interest
295  RooArgSet *paramsOfInterest = nullParams; // they are the same as before in this case
296  plc.SetParameters(*paramsOfInterest);
297  LikelihoodInterval *lrint = (LikelihoodInterval *)plc.GetInterval(); // that was easy.
298  lrint->SetConfidenceLevel(0.95);
299
300  // Step 9c. Get upper and lower limits
301  cout << "lower limit on master signal = " << lrint->LowerLimit(*mu) << endl;
302  cout << "upper limit on master signal = " << lrint->UpperLimit(*mu) << endl;
303
304  delete lrint;
305  delete htr;
306  delete wspace;
307  delete nullParams;
308  delete poi;
309 }
310
311 void rs_numberCountingCombination_observedWithTau()
312 {
313
314  /////////////////////////////////////////
315  // The same example with observed data in a main
316  // measurement and an background-only auxiliary
317  // measurement with a factor tau more background
318  // than in the main measurement.
319
320  /////////////////////////////////////////
321  // The Model building stage
322  /////////////////////////////////////////
323
324  // Step 1, define arrays with signal & bkg expectations and background uncertainties
325  // We still need the expectation to relate signal in different channels with the master signal
326  Double_t s[2] = {20., 10.}; // expected signal
327
328  // Step 2, use a RooStats factory to build a PDF for a
329  // number counting combination and add it to the workspace.
330  // We need to give the signal expectation to relate the masterSignal
331  // to the signal contribution in the individual channels.
332  // The model neglects correlations in background uncertainty,
333  // but they could be added without much change to the example.
335  RooWorkspace *wspace = new RooWorkspace();
336  f.AddModel(s, 2, wspace, "TopLevelPdf", "masterSignal");
337
338  // Step 3, use a RooStats factory to add datasets to the workspace.
339  // Add the observed data to the workspace in the on-off problem.
340  Double_t mainMeas[2] = {123., 117.}; // observed main measurement
341  Double_t sideband[2] = {11123., 9876.}; // observed sideband
342  Double_t tau[2] = {100., 100.}; // ratio of bkg in sideband to bkg in main measurement, from experimental design.
343  f.AddDataWithSideband(mainMeas, sideband, tau, 2, wspace, "ObservedNumberCountingDataWithSideband");
344
345  // see below for a printout of the workspace
346  // wspace->Print(); //uncomment to see structure of workspace
347
348  /////////////////////////////////////////
349  // The Hypothesis testing stage:
350  /////////////////////////////////////////
351  // Step 4, Define the null hypothesis for the calculator
352  // Here you need to know the name of the variables corresponding to hypothesis.
353  RooRealVar *mu = wspace->var("masterSignal");
354  RooArgSet *poi = new RooArgSet(*mu);
355  RooArgSet *nullParams = new RooArgSet("nullParams");
357  // here we explicitly set the value of the parameters for the null
358  nullParams->setRealValue("masterSignal", 0);
359
360  // Step 5, Create a calculator for doing the hypothesis test.
361  // because this is a
362  ProfileLikelihoodCalculator plc(*wspace->data("ObservedNumberCountingDataWithSideband"), *wspace->pdf("TopLevelPdf"),
363  *poi, 0.05, nullParams);
364
365  // Step 7, Use the Calculator to get a HypoTestResult
366  HypoTestResult *htr = plc.GetHypoTest();
367  cout << "-------------------------------------------------" << endl;
368  cout << "The p-value for the null is " << htr->NullPValue() << endl;
369  cout << "Corresponding to a significance of " << htr->Significance() << endl;
370  cout << "-------------------------------------------------\n\n" << endl;
371
372  /* observed case should return:
373  -------------------------------------------------
374  The p-value for the null is 0.0352035
375  Corresponding to a significance of 1.80928
376  -------------------------------------------------
377  */
378
379  //////////////////////////////////////////
380  // Confidence Interval Stage
381
382  // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval.
383  // We need to specify what are our parameters of interest
384  RooArgSet *paramsOfInterest = nullParams; // they are the same as before in this case
385  plc.SetParameters(*paramsOfInterest);
386  LikelihoodInterval *lrint = (LikelihoodInterval *)plc.GetInterval(); // that was easy.
387  lrint->SetConfidenceLevel(0.95);
388
389  // Step 9c. Get upper and lower limits
390  cout << "lower limit on master signal = " << lrint->LowerLimit(*mu) << endl;
391  cout << "upper limit on master signal = " << lrint->UpperLimit(*mu) << endl;
392
393  delete lrint;
394  delete htr;
395  delete wspace;
396  delete nullParams;
397  delete poi;
398 }
RooWorkspace::data
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found.
Definition: RooWorkspace.cxx:1368
RooStats::HypoTestResult::Significance
virtual Double_t Significance() const
familiar name for the Null p-value in terms of 1-sided Gaussian significance
Definition: HypoTestResult.h:68
RooArgSet::setRealValue
Bool_t setRealValue(const char *name, Double_t newVal=0, Bool_t verbose=kFALSE)
Set value of a RooAbsRealLValye stored in set with given name to newVal No error messages are printed...
Definition: RooArgSet.cxx:495
f
#define f(i)
Definition: RSha256.hxx:104
RooAbsCollection::first
RooAbsArg * first() const
Definition: RooAbsCollection.h:176
RooStats::ProfileLikelihoodCalculator
The ProfileLikelihoodCalculator is a concrete implementation of CombinedCalculator (the interface cla...
Definition: ProfileLikelihoodCalculator.h:22
RooStats::HypoTestResult::NullPValue
virtual Double_t NullPValue() const
Return p-value for null hypothesis.
Definition: HypoTestResult.h:45
RooAbsArg::Print
virtual void Print(Option_t *options=0) const
Print the object to the defaultPrintStream().
Definition: RooAbsArg.h:320
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
ConfInterval.h
NumberCountingPdfFactory.h
HypoTestResult.h
b
#define b(i)
Definition: RSha256.hxx:100
RooStats::NumberCountingPdfFactory
A factory for building PDFs and data for a number counting combination.
Definition: NumberCountingPdfFactory.h:22
RooStats::LikelihoodIntervalPlot
This class provides simple and straightforward utilities to plot a LikelihoodInterval object.
Definition: LikelihoodIntervalPlot.h:30
RooStats::LikelihoodInterval::LowerLimit
Double_t LowerLimit(const RooRealVar &param)
return the lower bound of the interval on a given parameter
Definition: LikelihoodInterval.h:65
RooFit
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition: RooCFunction1Binding.h:29
RooWorkspace::pdf
RooAbsPdf * pdf(const char *name) const
Retrieve p.d.f (RooAbsPdf) with given name. A null pointer is returned if not found.
Definition: RooWorkspace.cxx:1277
RooRealVar.h
RooStats::LikelihoodInterval::IsInInterval
virtual Bool_t IsInInterval(const RooArgSet &) const
check if given point is in the interval
Definition: LikelihoodInterval.cxx:117
RooWorkspace
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43
Double_t
double Double_t
Definition: RtypesCore.h:59
RooStats
Namespace for the RooStats classes.
Definition: Asimov.h:19
ProfileLikelihoodCalculator.h
RooStats::HypoTestResult
HypoTestResult is a base class for results from hypothesis tests.
Definition: HypoTestResult.h:22
RooWorkspace::var
RooRealVar * var(const char *name) const
Retrieve real-valued variable (RooRealVar) with given name. A null pointer is returned if not found.
Definition: RooWorkspace.cxx:1295
virtual void addClone(const RooAbsCollection &col, Bool_t silent=kFALSE)
Add a collection of arguments to this collection by calling addOwned() for each element in the source...
Definition: RooArgSet.h:96
LikelihoodIntervalPlot.h
RooRealVar
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:36
RooStats::LikelihoodInterval
LikelihoodInterval is a concrete implementation of the RooStats::ConfInterval interface.
Definition: LikelihoodInterval.h:34
RooArgSet
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooStats::LikelihoodInterval::UpperLimit
Double_t UpperLimit(const RooRealVar &param)
return the upper bound of the interval on a given parameter
Definition: LikelihoodInterval.h:69
RooStats::LikelihoodInterval::SetConfidenceLevel
virtual void SetConfidenceLevel(Double_t cl)
set the confidence level for the interval (e.g 0.682 for a 1-sigma interval)
Definition: LikelihoodInterval.h:52