Logo ROOT   master
Reference Guide
NumberCountingPdfFactory.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer 28/07/2008
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////////////////////////
13 
14 /** \class RooStats::NumberCountingPdfFactory
15  \ingroup Roostats
16 
17 A factory for building PDFs and data for a number counting combination.
18 The factory produces a PDF for N channels with uncorrelated background
19 uncertainty. Correlations can be added by extending this PDF with additional terms.
20 The factory relates the signal in each channel to a master signal strength times the
21 expected signal in each channel. Thus, the final test is performed on the master signal strength.
22 This yields a more powerful test than letting signal in each channel be independent.
23 
24 The problem has been studied in these references:
25 
26  - http://arxiv.org/abs/physics/0511028
27  - http://arxiv.org/abs/physics/0702156
28  - http://cdsweb.cern.ch/record/1099969?ln=en
29 
30 One can incorporate uncertainty on the expected signal by adding additional terms.
31 For the future, perhaps this factory should be extended to include the efficiency terms automatically.
32 
33 */
34 
36 
37 #include "RooStats/RooStatsUtils.h"
38 
39 #include "RooRealVar.h"
40 #include "RooAddition.h"
41 #include "RooProduct.h"
42 #include "RooDataSet.h"
43 #include "RooProdPdf.h"
44 #include "RooFitResult.h"
45 #include "RooPoisson.h"
46 #include "RooGlobalFunc.h"
47 #include "RooCmdArg.h"
48 #include "RooWorkspace.h"
49 #include "RooMsgService.h"
50 #include "TTree.h"
51 #include <sstream>
52 
53 
54 
56 
57 
58 using namespace RooStats;
59 using namespace RooFit;
60 using namespace std;
61 
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// constructor
65 
67 }
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 /// destructor
71 
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// This method produces a PDF for N channels with uncorrelated background
77 /// uncertainty. It relates the signal in each channel to a master signal strength times the
78 /// expected signal in each channel.
79 ///
80 /// For the future, perhaps this method should be extended to include the efficiency terms automatically.
81 
83  Int_t nbins,
85  const char* pdfName, const char* muName) {
86 
87 
88 
89  using namespace RooFit;
90  using std::vector;
91 
92  TList likelihoodFactors;
93 
94  // Double_t MaxSigma = 8; // Needed to set ranges for variables.
95 
96  RooRealVar* masterSignal =
97  new RooRealVar(muName,"masterSignal",1., 0., 3.);
98 
99 
100  // loop over individual channels
101  for(Int_t i=0; i<nbins; ++i){
102  // need to name the variables dynamically, so please forgive the string manipulation and focus on values & ranges.
103  std::stringstream str;
104  str<<"_"<<i;
105  RooRealVar* expectedSignal =
106  new RooRealVar(("expected_s"+str.str()).c_str(),("expected_s"+str.str()).c_str(),sig[i], 0., 2*sig[i]);
107  expectedSignal->setConstant(kTRUE);
108 
109  RooProduct* s =
110  new RooProduct(("s"+str.str()).c_str(),("s"+str.str()).c_str(), RooArgSet(*masterSignal, *expectedSignal));
111 
112  RooRealVar* b =
113  new RooRealVar(("b"+str.str()).c_str(),("b"+str.str()).c_str(), .5, 0.,1.);
114  RooRealVar* tau =
115  new RooRealVar(("tau"+str.str()).c_str(),("tau"+str.str()).c_str(), .5, 0., 1.);
116  tau->setConstant(kTRUE);
117 
118  RooAddition* splusb =
119  new RooAddition(("splusb"+str.str()).c_str(),("s"+str.str()+"+"+"b"+str.str()).c_str(),
120  RooArgSet(*s,*b));
121  RooProduct* bTau =
122  new RooProduct(("bTau"+str.str()).c_str(),("b*tau"+str.str()).c_str(), RooArgSet(*b, *tau));
123  RooRealVar* x =
124  new RooRealVar(("x"+str.str()).c_str(),("x"+str.str()).c_str(), 0.5 , 0., 1.);
125  RooRealVar* y =
126  new RooRealVar(("y"+str.str()).c_str(),("y"+str.str()).c_str(), 0.5, 0., 1.);
127 
128 
129  RooPoisson* sigRegion =
130  new RooPoisson(("sigRegion"+str.str()).c_str(),("sigRegion"+str.str()).c_str(), *x,*splusb);
131 
132  //LM: need to set noRounding since y can take non integer values
133  RooPoisson* sideband =
134  new RooPoisson(("sideband"+str.str()).c_str(),("sideband"+str.str()).c_str(), *y,*bTau,true);
135 
136  likelihoodFactors.Add(sigRegion);
137  likelihoodFactors.Add(sideband);
138 
139  }
140 
141  RooArgSet likelihoodFactorSet(likelihoodFactors);
142  RooProdPdf joint(pdfName,"joint", likelihoodFactorSet );
143  // joint.printCompactTree();
144 
145  // add this PDF to workspace.
146  // Need to do import into workspace now to get all the structure imported as well.
147  // Just returning the WS will loose the rest of the structure b/c it will go out of scope
149  ws->import(joint);
151 }
152 
153 ////////////////////////////////////////////////////////////////////////////////
154 /// Arguments are an array of expected signal, expected background, and relative
155 /// background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
156 
158  Double_t* back,
159  Double_t* back_syst,
160  Int_t nbins,
161  RooWorkspace* ws, const char* dsName) {
162 
163  std::vector<Double_t> mainMeas(nbins);
164 
165  // loop over channels
166  for(Int_t i=0; i<nbins; ++i){
167  mainMeas[i] = sig[i] + back[i];
168  }
169  return AddData(&mainMeas[0], back, back_syst, nbins, ws, dsName);
170 }
171 
172 ////////////////////////////////////////////////////////////////////////////////
173 /// Arguments are an array of expected signal, expected background, and relative
174 /// ratio of background expected in the sideband to that expected in signal region,
175 /// and the number of channels.
176 
178  Double_t* backExp,
179  Double_t* tau,
180  Int_t nbins,
181  RooWorkspace* ws, const char* dsName) {
182 
183  std::vector<Double_t> mainMeas(nbins);
184  std::vector<Double_t> sideband(nbins);
185  for(Int_t i=0; i<nbins; ++i){
186  mainMeas[i] = sigExp[i] + backExp[i];
187  sideband[i] = backExp[i]*tau[i];
188  }
189  return AddDataWithSideband(&mainMeas[0], &sideband[0], tau, nbins, ws, dsName);
190 
191 }
192 
193 ////////////////////////////////////////////////////////////////////////////////
194 /// need to be careful here that the range of observable in the dataset is consistent with the one in the workspace
195 /// don't rescale unless necessary. If it is necessary, then rescale by x10 or a defined maximum.
196 
198  return SafeObservableCreation(ws, varName, value, 10.*value);
199 
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// need to be careful here that the range of observable in the dataset is consistent with the one in the workspace
204 /// don't rescale unless necessary. If it is necessary, then rescale by x10 or a defined maximum.
205 
207  Double_t value, Double_t maximum) {
208  RooRealVar* x = ws->var( varName );
209  if( !x )
210  x = new RooRealVar(varName, varName, value, 0, maximum );
211  if( x->getMax() < value )
212  x->setMax( max(x->getMax(), 10*value ) );
213  x->setVal( value );
214 
215  return x;
216 }
217 
218 
219 ////////////////////////////////////////////////////////////////////////////////
220 /// Arguments are an array of results from a main measurement, a measured background,
221 /// and relative background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
222 
224  Double_t* back,
225  Double_t* back_syst,
226  Int_t nbins,
227  RooWorkspace* ws, const char* dsName) {
228 
229  using namespace RooFit;
230  using std::vector;
231 
232  Double_t MaxSigma = 8; // Needed to set ranges for variables.
233 
234  TList observablesCollection;
235 
236  TTree* tree = new TTree();
237  std::vector<Double_t> xForTree(nbins);
238  std::vector<Double_t> yForTree(nbins);
239 
240  // loop over channels
241  for(Int_t i=0; i<nbins; ++i){
242  std::stringstream str;
243  str<<"_"<<i;
244 
245  //Double_t _tau = 1./back[i]/back_syst[i]/back_syst[i];
246  // LM: compute tau correctly for the Gamma distribution : mode = tau*b and variance is (tau*b+1)
247  Double_t err = back_syst[i];
248  Double_t _tau = (1.0 + sqrt(1 + 4 * err * err))/ (2. * err * err)/ back[i];
249 
250  RooRealVar* tau = SafeObservableCreation(ws, ("tau"+str.str()).c_str(), _tau );
251 
252  oocoutW(ws,ObjectHandling) << "NumberCountingPdfFactory: changed value of " << tau->GetName() << " to " << tau->getVal() <<
253  " to be consistent with background and its uncertainty. " <<
254  " Also stored these values of tau into workspace with name . " << (string(tau->GetName())+string(dsName)).c_str() <<
255  " if you test with a different dataset, you should adjust tau appropriately.\n"<< endl;
257  ws->import(*((RooRealVar*) tau->clone( (string(tau->GetName())+string(dsName)).c_str() ) ) );
259 
260  // need to be careful
261  RooRealVar* x = SafeObservableCreation(ws, ("x"+str.str()).c_str(), mainMeas[i]);
262 
263  // need to be careful
264  RooRealVar* y = SafeObservableCreation(ws, ("y"+str.str()).c_str(), back[i]*_tau );
265 
266  observablesCollection.Add(x);
267  observablesCollection.Add(y);
268 
269  xForTree[i] = mainMeas[i];
270  yForTree[i] = back[i]*_tau;
271 
272  tree->Branch(("x"+str.str()).c_str(), &xForTree[i] ,("x"+str.str()+"/D").c_str());
273  tree->Branch(("y"+str.str()).c_str(), &yForTree[i] ,("y"+str.str()+"/D").c_str());
274 
275  ws->var(("b"+str.str()).c_str())->setMax( 1.2*back[i]+MaxSigma*(sqrt(back[i])+back[i]*back_syst[i]) );
276  ws->var(("b"+str.str()).c_str())->setVal( back[i] );
277 
278  }
279  tree->Fill();
280  // tree->Print();
281  // tree->Scan();
282 
283  RooArgList* observableList = new RooArgList(observablesCollection);
284 
285  // observableSet->Print();
286  // observableList->Print();
287 
288  RooDataSet* data = new RooDataSet(dsName,"Number Counting Data", tree, *observableList); // one experiment
289  // data->Scan();
290 
291 
292  // import hypothetical data
294  ws->import(*data);
296 
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// Arguments are an array of expected signal, expected background, and relative
301 /// background uncertainty (eg. 0.1 for 10% uncertainty), and the number of channels.
302 
304  Double_t* sideband,
305  Double_t* tauForTree,
306  Int_t nbins,
307  RooWorkspace* ws, const char* dsName) {
308 
309  using namespace RooFit;
310  using std::vector;
311 
312  Double_t MaxSigma = 8; // Needed to set ranges for variables.
313 
314  TList observablesCollection;
315 
316  TTree* tree = new TTree();
317 
318  std::vector<Double_t> xForTree(nbins);
319  std::vector<Double_t> yForTree(nbins);
320 
321 
322  // loop over channels
323  for(Int_t i=0; i<nbins; ++i){
324  std::stringstream str;
325  str<<"_"<<i;
326 
327  Double_t _tau = tauForTree[i];
328  Double_t back_syst = 1./sqrt(sideband[i]);
329  Double_t back = (sideband[i]/_tau);
330 
331 
332  RooRealVar* tau = SafeObservableCreation(ws, ("tau"+str.str()).c_str(), _tau );
333 
334  oocoutW(ws,ObjectHandling) << "NumberCountingPdfFactory: changed value of " << tau->GetName() << " to " << tau->getVal() <<
335  " to be consistent with background and its uncertainty. " <<
336  " Also stored these values of tau into workspace with name . " << (string(tau->GetName())+string(dsName)).c_str() <<
337  " if you test with a different dataset, you should adjust tau appropriately.\n"<< endl;
339  ws->import(*((RooRealVar*) tau->clone( (string(tau->GetName())+string(dsName)).c_str() ) ) );
341 
342  // need to be careful
343  RooRealVar* x = SafeObservableCreation(ws, ("x"+str.str()).c_str(), mainMeas[i]);
344 
345  // need to be careful
346  RooRealVar* y = SafeObservableCreation(ws, ("y"+str.str()).c_str(), sideband[i] );
347 
348 
349  observablesCollection.Add(x);
350  observablesCollection.Add(y);
351 
352  xForTree[i] = mainMeas[i];
353  yForTree[i] = sideband[i];
354 
355  tree->Branch(("x"+str.str()).c_str(), &xForTree[i] ,("x"+str.str()+"/D").c_str());
356  tree->Branch(("y"+str.str()).c_str(), &yForTree[i] ,("y"+str.str()+"/D").c_str());
357 
358  ws->var(("b"+str.str()).c_str())->setMax( 1.2*back+MaxSigma*(sqrt(back)+back*back_syst) );
359  ws->var(("b"+str.str()).c_str())->setVal( back );
360 
361  }
362  tree->Fill();
363  // tree->Print();
364  // tree->Scan();
365 
366  RooArgList* observableList = new RooArgList(observablesCollection);
367 
368  // observableSet->Print();
369  // observableList->Print();
370 
371  RooDataSet* data = new RooDataSet(dsName,"Number Counting Data", tree, *observableList); // one experiment
372  // data->Scan();
373 
374 
375  // import hypothetical data
377  ws->import(*data);
379 
380 }
Poisson pdf.
Definition: RooPoisson.h:19
void ws()
Definition: ws.C:66
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooProdPdf is an efficient implementation of a product of PDFs of the form .
Definition: RooProdPdf.h:31
A factory for building PDFs and data for a number counting combination.
static RooMsgService & instance()
Return reference to singleton instance.
STL namespace.
void AddExpData(Double_t *sigExp, Double_t *bkgExp, Double_t *db, Int_t nbins, RooWorkspace *ws, const char *dsName="ExpectedNumberCountingData")
Arguments are an array of expected signal, expected background, and relative background uncertainty (...
void AddExpDataWithSideband(Double_t *sigExp, Double_t *bkgExp, Double_t *tau, Int_t nbins, RooWorkspace *ws, const char *dsName="NumberCountingData")
Arguments are an array of expected signal, expected background, and relative ratio of background expe...
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t tau
Definition: TRolke.cxx:630
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
static constexpr double s
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:35
A doubly linked list.
Definition: TList.h:44
void setConstant(Bool_t value=kTRUE)
void setGlobalKillBelow(RooFit::MsgLevel level)
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:33
A RooProduct represents the product of a given set of RooAbsReal objects.
Definition: RooProduct.h:30
Namespace for the RooStats classes.
Definition: Asimov.h:19
#define ClassImp(name)
Definition: Rtypes.h:361
Double_t y[n]
Definition: legend1.C:17
void AddDataWithSideband(Double_t *mainMeas, Double_t *sideband, Double_t *tau, Int_t nbins, RooWorkspace *ws, const char *dsName="ExpectedNumberCountingData")
Arguments are an array of expected signal, expected background, and relative background uncertainty (...
#define oocoutW(o, a)
Definition: RooMsgService.h:47
void AddData(Double_t *mainMeas, Double_t *bkgMeas, Double_t *db, Int_t nbins, RooWorkspace *ws, const char *dsName="NumberCountingData")
Arguments are an array of results from a main measurement, a measured background, and relative backgr...
virtual void Add(TObject *obj)
Definition: TList.h:87
RooRealVar * SafeObservableCreation(RooWorkspace *ws, const char *varName, Double_t value)
need to be careful here that the range of observable in the dataset is consistent with the one in the...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
Definition: tree.py:1
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgList.h:21
A TTree represents a columnar dataset.
Definition: TTree.h:78
RooAddition calculates the sum of a set of RooAbsReal terms, or when constructed with two sets...
Definition: RooAddition.h:26
const Bool_t kTRUE
Definition: RtypesCore.h:89
void AddModel(Double_t *sigExp, Int_t nchan, RooWorkspace *ws, const char *pdfName="CombinedPdf", const char *masterSignalName="masterSignal")
This method produces a PDF for N channels with uncorrelated background uncertainty.
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43