Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TLimit.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Christophe.Delaere@cern.ch 21/08/2002
3
4///////////////////////////////////////////////////////////////////////////
5//
6// TLimit
7//
8// Class to compute 95% CL limits
9//
10// adapted from the mclimit code from Tom Junk (CLs method)
11// see http://root.cern.ch/root/doc/TomJunk.pdf
12// see http://cern.ch/thomasj/searchlimits/ecl.html
13// see: Tom Junk,NIM A434, p. 435-443, 1999
14//
15// see also the following interesting references:
16// Alex Read, "Presentation of search results: the CLs technique"
17// Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
18// http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
19//
20// A nice article is also available in the CERN yellow report with the proceeding
21// of the 2000 CERN workshop on confidence intervals.
22//
23// Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
24// CERN 2000-005 (30 May 2000)
25//
26// see note about: "Should I use TRolke, TFeldmanCousins, TLimit?"
27// in the TRolke class description.
28//
29///////////////////////////////////////////////////////////////////////////
30
31/** \class TLimit
32 \ingroup Hist
33 Algorithm to compute 95% C.L. limits using the Likelihood ratio
34 semi-bayesian method.
35
36 Implemented by C. Delaere from the mclimit code written by Tom Junk [HEP-EX/9902006].
37 See [http://cern.ch/thomasj/searchlimits/ecl.html](http://cern.ch/thomasj/searchlimits/ecl.html) for more details.
38
39 It takes signal, background and data histograms wrapped in a
40 TLimitDataSource as input and runs a set of Monte Carlo experiments in
41 order to compute the limits. If needed, inputs are fluctuated according
42 to systematics. The output is a TConfidenceLevel.
43
44 The class TLimitDataSource takes the signal, background and data histograms as well as different
45 systematics sources to form the TLimit input.
46
47 The class TConfidenceLevel represents the final result of the TLimit algorithm. It is created just after the
48 time-consuming part and can be stored in a TFile for further processing.
49 It contains light methods to return CLs, CLb and other interesting
50 quantities.
51
52 The actual algorithm...
53
54 From an input (TLimitDataSource) it produces an output TConfidenceLevel.
55 For this, nmc Monte Carlo experiments are performed.
56 As usual, the larger this number, the longer the compute time,
57 but the better the result.
58
59 Supposing that there is a plotfile.root file containing 3 histograms
60 (signal, background and data), you can imagine doing things like:
61
62~~~{.cpp}
63 TFile* infile=new TFile("plotfile.root","READ");
64 infile->cd();
65 TH1* sh=(TH1*)infile->Get("signal");
66 TH1* bh=(TH1*)infile->Get("background");
67 TH1* dh=(TH1*)infile->Get("data");
68 TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
69 TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
70 std::cout << " CLs : " << myconfidence->CLs() << std::endl;
71 std::cout << " CLsb : " << myconfidence->CLsb() << std::endl;
72 std::cout << " CLb : " << myconfidence->CLb() << std::endl;
73 std::cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << std::endl;
74 std::cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << std::endl;
75 std::cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << std::endl;
76 delete myconfidence;
77 delete mydatasource;
78 infile->Close();
79~~~
80 More information can still be found on [this page](http://cern.ch/aleph-proj-alphapp/doc/tlimit.html)
81 */
82
83#include "TLimit.h"
84#include "TArrayD.h"
85#include "TVectorD.h"
86#include "TOrdCollection.h"
87#include "TConfidenceLevel.h"
88#include "TLimitDataSource.h"
89#include "TRandom3.h"
90#include "TH1.h"
91#include "TObjArray.h"
92#include "TMath.h"
93#include "TIterator.h"
94#include "TObjString.h"
95#include "TClassTable.h"
96#include "Riostream.h"
97
99
102
104 Int_t nmc, bool stat,
105 TRandom * generator)
106{
107 // The final object returned...
108 TConfidenceLevel *result = new TConfidenceLevel(nmc);
109 // The random generator used...
110 TRandom *myrandom = generator ? generator : new TRandom3;
111 // Compute some total quantities on all the channels
112 Int_t maxbins = 0;
113 Double_t nsig = 0;
114 Double_t nbg = 0;
115 Int_t ncand = 0;
116 Int_t i;
117 for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
118 maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
119 (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
120 nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
121 nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
122 ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
123 }
124 result->SetBtot(nbg);
125 result->SetStot(nsig);
126 result->SetDtot(ncand);
127 Double_t buffer = 0;
128 fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
129 for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
130 for (Int_t bin = 0;
131 bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
132 bin++) {
133 Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
134 Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
135 Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
136 // Compute the value of the "-2lnQ" for the actual data
137 if ((b == 0) && (s > 0)) {
138 std::cout << "WARNING: Ignoring bin " << bin << " of channel "
139 << channel << " which has s=" << s << " but b=" << b << std::endl;
140 std::cout << " Maybe the MC statistic has to be improved..." << std::endl;
141 }
142 if ((s > 0) && (b > 0))
143 buffer += LogLikelihood(s, b, b, d);
144 // precompute the log(1+s/b)'s in an array to speed up computation
145 // background-free bins are set to have a maximum t.s. value
146 // for protection (corresponding to s/b of about 5E8)
147 if ((s > 0) && (b > 0))
148 fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
149 else if ((s > 0) && (b == 0))
150 fgTable->AddAt(20, (channel * maxbins) + bin);
151 }
152 result->SetTSD(buffer);
153 // accumulate MC experiments. Hold the test statistic function fixed, but
154 // fluctuate s and b within syst. errors for computing probabilities of
155 // having that outcome. (Alex Read's prescription -- errors are on the ensemble,
156 // not on the observed test statistic. This technique does not split outcomes.)
157 // keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
158 // (reason -- like to keep the < signs right)
159 Double_t *tss = new Double_t[nmc];
160 Double_t *tsb = new Double_t[nmc];
161 Double_t *lrs = new Double_t[nmc];
162 Double_t *lrb = new Double_t[nmc];
163 TLimitDataSource* tmp_src = (TLimitDataSource*)(data->Clone());
164 TLimitDataSource* tmp_src2 = (TLimitDataSource*)(data->Clone());
165 for (i = 0; i < nmc; i++) {
166 tss[i] = 0;
167 tsb[i] = 0;
168 lrs[i] = 0;
169 lrb[i] = 0;
170 // fluctuate signal and background
171 TLimitDataSource* fluctuated = Fluctuate(data, tmp_src, !i, myrandom, stat) ? tmp_src : data;
172 // make an independent set of fluctuations for reweighting pseudoexperiments
173 // it turns out using the same fluctuated ones for numerator and denominator
174 // is biased.
175 TLimitDataSource* fluctuated2 = Fluctuate(data, tmp_src2, false, myrandom, stat) ? tmp_src2 : data;
176
177 for (Int_t channel = 0;
178 channel <= fluctuated->GetSignal()->GetLast(); channel++) {
179 for (Int_t bin = 0;
180 bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
181 bin++) {
182 if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
183 // s+b hypothesis
184 Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
185 (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
186 Double_t rand = myrandom->Poisson(rate);
187 tss[i] += rand * fgTable->At((channel * maxbins) + bin);
188 Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
189 Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
190 Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
191 Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
192 if ((s > 0) && (b2 > 0))
193 lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
194 else if ((s > 0) && (b2 == 0))
195 lrs[i] += 20 * rand - s;
196 // b hypothesis
197 rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
198 rand = myrandom->Poisson(rate);
199 tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
200 if ((s2 > 0) && (b > 0))
201 lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
202 else if ((s > 0) && (b == 0))
203 lrb[i] += 20 * rand - s;
204 }
205 }
206 }
207 lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
208 lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
209 }
210 delete tmp_src;
211 delete tmp_src2;
212 // lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
213 // that choice of s and b within syst. errors in the ensemble. These are
214 // the MC experiment weights for relating the s+b and b PDF's of the unsmeared
215 // test statistic (in which cas one can use another test statistic if one likes).
216
217 // Now produce the output object.
218 // The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
219 result->SetTSS(tss);
220 result->SetTSB(tsb);
221 result->SetLRS(lrs);
222 result->SetLRB(lrb);
223 if (!generator)
224 delete myrandom;
225 return result;
226}
227
229 bool init, TRandom * generator, bool stat)
230{
231 // initialisation: create a sorted list of all the names of systematics
232 if (init) {
233 // create a "map" with the systematics names
234 TIterator *errornames = input->GetErrorNames()->MakeIterator();
235 TObjArray *listofnames = 0;
236 delete fgSystNames;
238 while ((listofnames = ((TObjArray *) errornames->Next()))) {
239 TObjString *name = 0;
240 TIterator *loniter = listofnames->MakeIterator();
241 while ((name = (TObjString *) (loniter->Next())))
242 if ((fgSystNames->IndexOf(name)) < 0)
244 }
245 fgSystNames->Sort();
246 }
247 // if the output is not given, create it from the input
248 if (!output)
249 output = (TLimitDataSource*)(input->Clone());
250 // if there are no systematics, just returns the input as "fluctuated" output
251 if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
252 return 0;
253 }
254 // if there are only stat, just fluctuate stats.
255 if (fgSystNames->GetSize() <= 0) {
256 output->SetOwner();
257 for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
258 TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
259 TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
260 if(stat)
261 for(int i=1; i<=newsignal->GetNbinsX(); i++) {
262 newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
263 }
264 newsignal->SetDirectory(0);
265 TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
266 TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
267 if(stat)
268 for(int i=1; i<=newbackground->GetNbinsX(); i++)
269 newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
270 newbackground->SetDirectory(0);
271 }
272 return 1;
273 }
274 // Find a choice for the random variation and
275 // re-toss all random numbers if any background or signal
276 // goes negative. (background = 0 is bad too, so put a little protection
277 // around it -- must have at least 10% of the bg estimate).
278 Bool_t retoss = kTRUE;
279 Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
280 Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
281 do {
282 Double_t *toss = new Double_t[fgSystNames->GetSize()];
283 for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
284 toss[i] = generator->Gaus(0, 1);
285 retoss = kFALSE;
286 for (Int_t channel = 0;
287 channel <= input->GetSignal()->GetLast();
288 channel++) {
289 serrf[channel] = 0;
290 berrf[channel] = 0;
291 for (Int_t bin = 0;
292 bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
293 bin++) {
294 serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
295 toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
296 berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
297 toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
298 }
299 if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
300 retoss = kTRUE;
301 continue;
302 }
303 }
304 delete[]toss;
305 } while (retoss);
306 // adjust the fluctuated signal and background counts with a legal set
307 // of random fluctuations above.
308 output->SetOwner();
309 for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
310 channel++) {
311 TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
312 TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
313 if(stat)
314 for(int i=1; i<=newsignal->GetNbinsX(); i++)
315 newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
316 else
317 for(int i=1; i<=newsignal->GetNbinsX(); i++)
318 newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
319 newsignal->Scale(1 + serrf[channel]);
320 newsignal->SetDirectory(0);
321 TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
322 TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
323 if(stat)
324 for(int i=1; i<=newbackground->GetNbinsX(); i++)
325 newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
326 else
327 for(int i=1; i<=newbackground->GetNbinsX(); i++)
328 newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
329 newbackground->Scale(1 + berrf[channel]);
330 newbackground->SetDirectory(0);
331 }
332 delete[] serrf;
333 delete[] berrf;
334 return 1;
335}
336
338 Int_t nmc, bool stat,
339 TRandom * generator)
340{
341 // Compute limit.
342
343 TLimitDataSource* lds = new TLimitDataSource(s,b,d);
344 TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
345 delete lds;
346 return out;
347}
348
350 TVectorD* se, TVectorD* be, TObjArray* l,
351 Int_t nmc, bool stat,
352 TRandom * generator)
353{
354 // Compute limit.
355
356 TLimitDataSource* lds = new TLimitDataSource(s,b,d,se,be,l);
357 TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
358 delete lds;
359 return out;
360}
361
363 Int_t nmc,
364 bool stat,
365 TRandom * generator)
366{
367 // Compute limit.
368
369 TH1D* sh = new TH1D("__sh","__sh",1,0,2);
370 sh->Fill(1,s);
371 TH1D* bh = new TH1D("__bh","__bh",1,0,2);
372 bh->Fill(1,b);
373 TH1D* dh = new TH1D("__dh","__dh",1,0,2);
374 dh->Fill(1,d);
375 TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh);
376 TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
377 delete lds;
378 delete sh;
379 delete bh;
380 delete dh;
381 return out;
382}
383
385 TVectorD* se, TVectorD* be, TObjArray* l,
386 Int_t nmc,
387 bool stat,
388 TRandom * generator)
389{
390 // Compute limit.
391
392 TH1D* sh = new TH1D("__sh","__sh",1,0,2);
393 sh->Fill(1,s);
394 TH1D* bh = new TH1D("__bh","__bh",1,0,2);
395 bh->Fill(1,b);
396 TH1D* dh = new TH1D("__dh","__dh",1,0,2);
397 dh->Fill(1,d);
398 TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh,se,be,l);
399 TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
400 delete lds;
401 delete sh;
402 delete bh;
403 delete dh;
404 return out;
405}
406
408{
409 // Compute LogLikelihood (static function)
410
411 return d*TMath::Log((s+b)/b2);
412}
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
int Int_t
Definition RtypesCore.h:45
const Bool_t kFALSE
Definition RtypesCore.h:92
double Double_t
Definition RtypesCore.h:59
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassImp(name)
Definition Rtypes.h:364
char name[80]
Definition TGX11.cxx:110
Array of doubles (64 bits per element).
Definition TArrayD.h:27
Double_t At(Int_t i) const
Definition TArrayD.h:79
void Set(Int_t n)
Set size of this array to n doubles.
Definition TArrayD.cxx:106
void AddAt(Double_t c, Int_t i)
Set the double c value at position i in the array.
Definition TArrayD.cxx:94
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Class to compute 95% CL limits.
void SetTSS(Double_t *in)
Set the TSS.
void SetStot(Double_t in)
void SetBtot(Double_t in)
void SetLRB(Double_t *in)
void SetTSB(Double_t *in)
Set the TSB.
void SetLRS(Double_t *in)
void SetTSD(Double_t in)
void SetDtot(Int_t in)
1-D histogram with a double per channel (see TH1 documentation)}
Definition TH1.h:618
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition TH1.cxx:8777
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition TH1.cxx:8903
virtual Int_t GetNbinsX() const
Definition TH1.h:296
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition TH1.cxx:3350
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9062
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition TH1.cxx:4993
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition TH1.cxx:6564
Iterator abstract base class.
Definition TIterator.h:30
virtual TObject * Next()=0
This class serves as interface to feed data into the TLimit routines.
virtual TObjArray * GetErrorOnSignal()
virtual TObjArray * GetSignal()
virtual TObjArray * GetCandidates()
virtual TObjArray * GetBackground()
virtual TObjArray * GetErrorNames()
virtual TObjArray * GetErrorOnBackground()
Algorithm to compute 95% C.L.
Definition TLimit.h:19
static Double_t LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
Definition TLimit.cxx:407
static TOrdCollection * fgSystNames
Definition TLimit.h:51
static TArrayD * fgTable
Definition TLimit.h:50
static TConfidenceLevel * ComputeLimit(TLimitDataSource *data, Int_t nmc=50000, bool stat=false, TRandom *generator=0)
Definition TLimit.cxx:103
static bool Fluctuate(TLimitDataSource *input, TLimitDataSource *output, bool init, TRandom *, bool stat=false)
Definition TLimit.cxx:228
An array of TObjects.
Definition TObjArray.h:37
TIterator * MakeIterator(Bool_t dir=kIterForward) const
Returns an array iterator.
Int_t GetLast() const
Return index of last object in array.
TObject * At(Int_t idx) const
Definition TObjArray.h:166
Collectable string class.
Definition TObjString.h:28
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition TObject.cxx:146
Ordered collection.
Int_t IndexOf(const TObject *obj) const
Return index of object in collection.
void AddLast(TObject *obj)
Add object at the end of the collection.
void Sort()
If objects in collection are sortable (i.e.
Int_t BinarySearch(TObject *obj)
Find object using a binary search.
Random number generator class based on M.
Definition TRandom3.h:27
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition TRandom.cxx:274
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Definition TRandom.cxx:402
Double_t Exp(Double_t x)
Definition TMath.h:727
Double_t Log(Double_t x)
Definition TMath.h:760
auto * l
Definition textangle.C:4
static void output(int code)
Definition gifencode.c:226