Logo ROOT  
Reference Guide
NumberCountingPdfFactory.cxx
Go to the documentation of this file.
1// @(#)root/roostats:$Id$
2// Author: Kyle Cranmer 28/07/2008
3
4/*************************************************************************
5 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////////////////////////////////
13
14/** \class RooStats::NumberCountingPdfFactory
15 \ingroup Roostats
16
17A factory for building PDFs and data for a number counting combination.
18The factory produces a PDF for N channels with uncorrelated background
19uncertainty. Correlations can be added by extending this PDF with additional terms.
20The factory relates the signal in each channel to a master signal strength times the
21expected signal in each channel. Thus, the final test is performed on the master signal strength.
22This yields a more powerful test than letting signal in each channel be independent.
23
24The problem has been studied in these references:
25
26 - http://arxiv.org/abs/physics/0511028
27 - http://arxiv.org/abs/physics/0702156
28 - http://cdsweb.cern.ch/record/1099969?ln=en
29
30One can incorporate uncertainty on the expected signal by adding additional terms.
31For the future, perhaps this factory should be extended to include the efficiency terms automatically.
32
33*/
34
36
38
39#include "RooRealVar.h"
40#include "RooAddition.h"
41#include "RooProduct.h"
42#include "RooDataSet.h"
43#include "RooProdPdf.h"
44#include "RooFitResult.h"
45#include "RooPoisson.h"
46#include "RooGlobalFunc.h"
47#include "RooCmdArg.h"
48#include "RooWorkspace.h"
49#include "RooMsgService.h"
50#include "TTree.h"
51#include <sstream>
52
53
54
56
57
58using namespace RooStats;
59using namespace RooFit;
60using namespace std;
61
62
63////////////////////////////////////////////////////////////////////////////////
64/// constructor
65
66NumberCountingPdfFactory::NumberCountingPdfFactory() {
67}
68
69////////////////////////////////////////////////////////////////////////////////
70/// destructor
71
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// This method produces a PDF for N channels with uncorrelated background
77/// uncertainty. It relates the signal in each channel to a master signal strength times the
78/// expected signal in each channel.
79///
80/// For the future, perhaps this method should be extended to include the efficiency terms automatically.
81
83 Int_t nbins,
85 const char* pdfName, const char* muName) {
86
87
88
89 using namespace RooFit;
90 using std::vector;
91
92 TList likelihoodFactors;
93
94 // Double_t MaxSigma = 8; // Needed to set ranges for variables.
95
96 RooRealVar* masterSignal =
97 new RooRealVar(muName,"masterSignal",1., 0., 3.);
98
99
100 // loop over individual channels
101 for(Int_t i=0; i<nbins; ++i){
102 // need to name the variables dynamically, so please forgive the string manipulation and focus on values & ranges.
103 std::stringstream str;
104 str<<"_"<<i;
105 RooRealVar* expectedSignal =
106 new RooRealVar(("expected_s"+str.str()).c_str(),("expected_s"+str.str()).c_str(),sig[i], 0., 2*sig[i]);
107 expectedSignal->setConstant(kTRUE);
108
109 RooProduct* s =
110 new RooProduct(("s"+str.str()).c_str(),("s"+str.str()).c_str(), RooArgSet(*masterSignal, *expectedSignal));
111
112 RooRealVar* b =
113 new RooRealVar(("b"+str.str()).c_str(),("b"+str.str()).c_str(), .5, 0.,1.);
114 RooRealVar* tau =
115 new RooRealVar(("tau"+str.str()).c_str(),("tau"+str.str()).c_str(), .5, 0., 1.);
116 tau->setConstant(kTRUE);
117
118 RooAddition* splusb =
119 new RooAddition(("splusb"+str.str()).c_str(),("s"+str.str()+"+"+"b"+str.str()).c_str(),
120 RooArgSet(*s,*b));
121 RooProduct* bTau =
122 new RooProduct(("bTau"+str.str()).c_str(),("b*tau"+str.str()).c_str(), RooArgSet(*b, *tau));
123 RooRealVar* x =
124 new RooRealVar(("x"+str.str()).c_str(),("x"+str.str()).c_str(), 0.5 , 0., 1.);
125 RooRealVar* y =
126 new RooRealVar(("y"+str.str()).c_str(),("y"+str.str()).c_str(), 0.5, 0., 1.);
127
128
129 RooPoisson* sigRegion =
130 new RooPoisson(("sigRegion"+str.str()).c_str(),("sigRegion"+str.str()).c_str(), *x,*splusb);
131
132 //LM: need to set noRounding since y can take non integer values
133 RooPoisson* sideband =
134 new RooPoisson(("sideband"+str.str()).c_str(),("sideband"+str.str()).c_str(), *y,*bTau,true);
135
136 likelihoodFactors.Add(sigRegion);
137 likelihoodFactors.Add(sideband);
138
139 }
140
141 RooArgSet likelihoodFactorSet(likelihoodFactors);
142 RooProdPdf joint(pdfName,"joint", likelihoodFactorSet );
143 // joint.printCompactTree();
144
145 // add this PDF to workspace.
146 // Need to do import into workspace now to get all the structure imported as well.
147 // Just returning the WS will loose the rest of the structure b/c it will go out of scope
149 ws->import(joint);
151}
152
153////////////////////////////////////////////////////////////////////////////////
154/// Arguments are an array of expected signal, expected background, and relative
155/// background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
156
158 Double_t* back,
159 Double_t* back_syst,
160 Int_t nbins,
161 RooWorkspace* ws, const char* dsName) {
162
163 std::vector<Double_t> mainMeas(nbins);
164
165 // loop over channels
166 for(Int_t i=0; i<nbins; ++i){
167 mainMeas[i] = sig[i] + back[i];
168 }
169 return AddData(&mainMeas[0], back, back_syst, nbins, ws, dsName);
170}
171
172////////////////////////////////////////////////////////////////////////////////
173/// Arguments are an array of expected signal, expected background, and relative
174/// ratio of background expected in the sideband to that expected in signal region,
175/// and the number of channels.
176
178 Double_t* backExp,
179 Double_t* tau,
180 Int_t nbins,
181 RooWorkspace* ws, const char* dsName) {
182
183 std::vector<Double_t> mainMeas(nbins);
184 std::vector<Double_t> sideband(nbins);
185 for(Int_t i=0; i<nbins; ++i){
186 mainMeas[i] = sigExp[i] + backExp[i];
187 sideband[i] = backExp[i]*tau[i];
188 }
189 return AddDataWithSideband(&mainMeas[0], &sideband[0], tau, nbins, ws, dsName);
190
191}
192
193////////////////////////////////////////////////////////////////////////////////
194/// need to be careful here that the range of observable in the dataset is consistent with the one in the workspace
195/// don't rescale unless necessary. If it is necessary, then rescale by x10 or a defined maximum.
196
198 return SafeObservableCreation(ws, varName, value, 10.*value);
199
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// need to be careful here that the range of observable in the dataset is consistent with the one in the workspace
204/// don't rescale unless necessary. If it is necessary, then rescale by x10 or a defined maximum.
205
207 Double_t value, Double_t maximum) {
208 RooRealVar* x = ws->var( varName );
209 if( !x )
210 x = new RooRealVar(varName, varName, value, 0, maximum );
211 if( x->getMax() < value )
212 x->setMax( max(x->getMax(), 10*value ) );
213 x->setVal( value );
214
215 return x;
216}
217
218
219////////////////////////////////////////////////////////////////////////////////
220/// Arguments are an array of results from a main measurement, a measured background,
221/// and relative background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
222
224 Double_t* back,
225 Double_t* back_syst,
226 Int_t nbins,
227 RooWorkspace* ws, const char* dsName) {
228
229 using namespace RooFit;
230 using std::vector;
231
232 Double_t MaxSigma = 8; // Needed to set ranges for variables.
233
234 TList observablesCollection;
235
236 TTree* tree = new TTree();
237 std::vector<Double_t> xForTree(nbins);
238 std::vector<Double_t> yForTree(nbins);
239
240 // loop over channels
241 for(Int_t i=0; i<nbins; ++i){
242 std::stringstream str;
243 str<<"_"<<i;
244
245 //Double_t _tau = 1./back[i]/back_syst[i]/back_syst[i];
246 // LM: compute tau correctly for the Gamma distribution : mode = tau*b and variance is (tau*b+1)
247 Double_t err = back_syst[i];
248 Double_t _tau = (1.0 + sqrt(1 + 4 * err * err))/ (2. * err * err)/ back[i];
249
250 RooRealVar* tau = SafeObservableCreation(ws, ("tau"+str.str()).c_str(), _tau );
251
252 oocoutW(ws,ObjectHandling) << "NumberCountingPdfFactory: changed value of " << tau->GetName() << " to " << tau->getVal() <<
253 " to be consistent with background and its uncertainty. " <<
254 " Also stored these values of tau into workspace with name . " << (string(tau->GetName())+string(dsName)).c_str() <<
255 " if you test with a different dataset, you should adjust tau appropriately.\n"<< endl;
257 ws->import(*((RooRealVar*) tau->clone( (string(tau->GetName())+string(dsName)).c_str() ) ) );
259
260 // need to be careful
261 RooRealVar* x = SafeObservableCreation(ws, ("x"+str.str()).c_str(), mainMeas[i]);
262
263 // need to be careful
264 RooRealVar* y = SafeObservableCreation(ws, ("y"+str.str()).c_str(), back[i]*_tau );
265
266 observablesCollection.Add(x);
267 observablesCollection.Add(y);
268
269 xForTree[i] = mainMeas[i];
270 yForTree[i] = back[i]*_tau;
271
272 tree->Branch(("x"+str.str()).c_str(), &xForTree[i] ,("x"+str.str()+"/D").c_str());
273 tree->Branch(("y"+str.str()).c_str(), &yForTree[i] ,("y"+str.str()+"/D").c_str());
274
275 ws->var(("b"+str.str()).c_str())->setMax( 1.2*back[i]+MaxSigma*(sqrt(back[i])+back[i]*back_syst[i]) );
276 ws->var(("b"+str.str()).c_str())->setVal( back[i] );
277
278 }
279 tree->Fill();
280 // tree->Print();
281 // tree->Scan();
282
283 RooArgList* observableList = new RooArgList(observablesCollection);
284
285 // observableSet->Print();
286 // observableList->Print();
287
288 RooDataSet* data = new RooDataSet(dsName,"Number Counting Data", tree, *observableList); // one experiment
289 // data->Scan();
290
291
292 // import hypothetical data
294 ws->import(*data);
296
297}
298
299////////////////////////////////////////////////////////////////////////////////
300/// Arguments are an array of expected signal, expected background, and relative
301/// background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
302
304 Double_t* sideband,
305 Double_t* tauForTree,
306 Int_t nbins,
307 RooWorkspace* ws, const char* dsName) {
308
309 using namespace RooFit;
310 using std::vector;
311
312 Double_t MaxSigma = 8; // Needed to set ranges for variables.
313
314 TList observablesCollection;
315
316 TTree* tree = new TTree();
317
318 std::vector<Double_t> xForTree(nbins);
319 std::vector<Double_t> yForTree(nbins);
320
321
322 // loop over channels
323 for(Int_t i=0; i<nbins; ++i){
324 std::stringstream str;
325 str<<"_"<<i;
326
327 Double_t _tau = tauForTree[i];
328 Double_t back_syst = 1./sqrt(sideband[i]);
329 Double_t back = (sideband[i]/_tau);
330
331
332 RooRealVar* tau = SafeObservableCreation(ws, ("tau"+str.str()).c_str(), _tau );
333
334 oocoutW(ws,ObjectHandling) << "NumberCountingPdfFactory: changed value of " << tau->GetName() << " to " << tau->getVal() <<
335 " to be consistent with background and its uncertainty. " <<
336 " Also stored these values of tau into workspace with name . " << (string(tau->GetName())+string(dsName)).c_str() <<
337 " if you test with a different dataset, you should adjust tau appropriately.\n"<< endl;
339 ws->import(*((RooRealVar*) tau->clone( (string(tau->GetName())+string(dsName)).c_str() ) ) );
341
342 // need to be careful
343 RooRealVar* x = SafeObservableCreation(ws, ("x"+str.str()).c_str(), mainMeas[i]);
344
345 // need to be careful
346 RooRealVar* y = SafeObservableCreation(ws, ("y"+str.str()).c_str(), sideband[i] );
347
348
349 observablesCollection.Add(x);
350 observablesCollection.Add(y);
351
352 xForTree[i] = mainMeas[i];
353 yForTree[i] = sideband[i];
354
355 tree->Branch(("x"+str.str()).c_str(), &xForTree[i] ,("x"+str.str()+"/D").c_str());
356 tree->Branch(("y"+str.str()).c_str(), &yForTree[i] ,("y"+str.str()+"/D").c_str());
357
358 ws->var(("b"+str.str()).c_str())->setMax( 1.2*back+MaxSigma*(sqrt(back)+back*back_syst) );
359 ws->var(("b"+str.str()).c_str())->setVal( back );
360
361 }
362 tree->Fill();
363 // tree->Print();
364 // tree->Scan();
365
366 RooArgList* observableList = new RooArgList(observablesCollection);
367
368 // observableSet->Print();
369 // observableList->Print();
370
371 RooDataSet* data = new RooDataSet(dsName,"Number Counting Data", tree, *observableList); // one experiment
372 // data->Scan();
373
374
375 // import hypothetical data
377 ws->import(*data);
379
380}
#define b(i)
Definition: RSha256.hxx:100
#define oocoutW(o, a)
Definition: RooMsgService.h:47
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
double sqrt(double)
void setConstant(Bool_t value=kTRUE)
Double_t getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:90
RooAddition calculates the sum of a set of RooAbsReal terms, or when constructed with two sets,...
Definition: RooAddition.h:26
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgList.h:21
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:33
static RooMsgService & instance()
Return reference to singleton instance.
void setGlobalKillBelow(RooFit::MsgLevel level)
Poisson pdf.
Definition: RooPoisson.h:19
RooProdPdf is an efficient implementation of a product of PDFs of the form.
Definition: RooProdPdf.h:31
A RooProduct represents the product of a given set of RooAbsReal objects.
Definition: RooProduct.h:30
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:35
virtual TObject * clone(const char *newname) const
Definition: RooRealVar.h:47
A factory for building PDFs and data for a number counting combination.
void AddExpDataWithSideband(Double_t *sigExp, Double_t *bkgExp, Double_t *tau, Int_t nbins, RooWorkspace *ws, const char *dsName="NumberCountingData")
Arguments are an array of expected signal, expected background, and relative ratio of background expe...
void AddModel(Double_t *sigExp, Int_t nchan, RooWorkspace *ws, const char *pdfName="CombinedPdf", const char *masterSignalName="masterSignal")
This method produces a PDF for N channels with uncorrelated background uncertainty.
RooRealVar * SafeObservableCreation(RooWorkspace *ws, const char *varName, Double_t value)
need to be careful here that the range of observable in the dataset is consistent with the one in the...
void AddData(Double_t *mainMeas, Double_t *bkgMeas, Double_t *db, Int_t nbins, RooWorkspace *ws, const char *dsName="NumberCountingData")
Arguments are an array of results from a main measurement, a measured background, and relative backgr...
void AddExpData(Double_t *sigExp, Double_t *bkgExp, Double_t *db, Int_t nbins, RooWorkspace *ws, const char *dsName="ExpectedNumberCountingData")
Arguments are an array of expected signal, expected background, and relative background uncertainty (...
void AddDataWithSideband(Double_t *mainMeas, Double_t *sideband, Double_t *tau, Int_t nbins, RooWorkspace *ws, const char *dsName="ExpectedNumberCountingData")
Arguments are an array of expected signal, expected background, and relative background uncertainty (...
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43
A doubly linked list.
Definition: TList.h:44
virtual void Add(TObject *obj)
Definition: TList.h:87
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
A TTree represents a columnar dataset.
Definition: TTree.h:78
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
@ ObjectHandling
Definition: RooGlobalFunc.h:68
Namespace for the RooStats classes.
Definition: Asimov.h:19
static constexpr double s
Definition: tree.py:1
void ws()
Definition: ws.C:66