Logo ROOT   6.18/05
Reference Guide
NeymanConstruction.cxx
Go to the documentation of this file.
1// @(#)root/roostats:$Id$
2// Author: Kyle Cranmer January 2009
3
4/*************************************************************************
5 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class RooStats::NeymanConstruction
13 \ingroup Roostats
14
15NeymanConstruction is a concrete implementation of the NeymanConstruction
16interface that, as the name suggests, performs a NeymanConstruction. It produces
17a RooStats::PointSetInterval, which is a concrete implementation of the
18ConfInterval interface.
19
20The Neyman Construction is not a uniquely defined statistical technique, it
21requires that one specify an ordering rule or ordering principle, which is
22usually incoded by choosing a specific test statistic and limits of integration
23(corresponding to upper/lower/central limits). As a result, this class must be
24configured with the corresponding information before it can produce an interval.
25Common configurations, such as the Feldman-Cousins approach, can be enforced by
26other light weight classes.
27
28The Neyman Construction considers every point in the parameter space
29independently, no assumptions are made that the interval is connected or of a
30particular shape. As a result, the PointSetInterval class is used to represent
31the result. The user indicate which points in the parameter space to perform
32the construction by providing a PointSetInterval instance with the desired points.
33
34This class is fairly light weight, because the choice of parameter points to be
35considered is factorized and so is the creation of the sampling distribution of
36the test statistic (which is done by a concrete class implementing the
37DistributionCreator interface). As a result, this class basically just drives the
38construction by:
39
40 - using a DistributionCreator to create the SamplingDistribution of a user-
41 defined test statistic for each parameter point of interest,
42 - defining the acceptance region in the data by finding the thresholds on the
43 test statistic such that the integral of the sampling distribution is of the
44 appropriate size and consistent with the limits of integration
45 (eg. upper/lower/central limits),
46 - and finally updating the PointSetInterval based on whether the value of the
47 test statistic evaluated on the data are in the acceptance region.
48
49*/
50
52
54
56
60
61#include "RooMsgService.h"
62#include "RooGlobalFunc.h"
63
64#include "RooDataSet.h"
65#include "TFile.h"
66#include "TTree.h"
67#include "TMath.h"
68#include "TH1F.h"
69
71
72using namespace RooFit;
73using namespace RooStats;
74using namespace std;
75
76
77////////////////////////////////////////////////////////////////////////////////
78/// default constructor
79
80NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):
81 fSize(0.05),
82 fData(data),
83 fModel(model),
84 fTestStatSampler(0),
85 fPointsToTest(0),
86 fLeftSideFraction(0),
87 fConfBelt(0), // constructed with tree data
88 fAdaptiveSampling(false),
89 fAdditionalNToysFactor(1.),
90 fSaveBeltToFile(false),
91 fCreateBelt(false)
92
93{
94// fWS = new RooWorkspace();
95// fOwnsWorkspace = true;
96// fDataName = "";
97// fPdfName = "";
98}
99
100////////////////////////////////////////////////////////////////////////////////
101/// default constructor
102/// if(fOwnsWorkspace && fWS) delete fWS;
103/// if(fConfBelt) delete fConfBelt;
104
106}
107
108////////////////////////////////////////////////////////////////////////////////
109/// Main interface to get a RooStats::ConfInterval.
110/// It constructs a RooStats::SetInterval.
111
113
114 TFile* f=0;
115 if(fSaveBeltToFile){
116 //coverity[FORWARD_NULL]
117 oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;
118 f = new TFile("SamplingDistributions.root","recreate");
119 }
120
121 Int_t npass = 0;
122 RooArgSet* point;
123
124 // strange problems when using snapshots.
125 // RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();
127
128 RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval",
129 "points in interval",
130 *(fPointsToTest->get(0)) );
131
132 // loop over points to test
133 for(Int_t i=0; i<fPointsToTest->numEntries(); ++i){
134 // get a parameter point from the list of points to test.
135 point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");
136
137 // set parameters of interest to current point
138 *fPOI = *point;
139
140 // set test stat sampler to use this point
142
143 // get the value of the test statistic for this data set
144 Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );
145 /*
146 cout << "NC CHECK: " << i << endl;
147 point->Print();
148 fPOI->Print("v");
149 fData.Print();
150 cout <<"thisTestStatistic = " << thisTestStatistic << endl;
151 */
152
153 // find the lower & upper thresholds on the test statistic that
154 // define the acceptance region in the data
155
156 SamplingDistribution* samplingDist=0;
158 Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;
159 Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;
160 Int_t additionalMC=0;
161
162 // the adaptive sampling algorithm wants at least one toy event to be outside
163 // of the requested pvalue including the sampling variation. That leads to an equation
164 // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and
165 // 1 = alpha N - Z sqrt(alpha N) // for lower limit
166 //
167 // solving for N gives:
168 // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)
169 // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha
170 // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)
171 // totalMC will be incremented by 2 before first call, so initiated it at half the value
173 if(fLeftSideFraction==0. || fLeftSideFraction ==1.){
174 totalMC = (Int_t) (2./fSize);
175 }
176 // use control
178 totalMC = (Int_t) tmc;
179
180 ToyMCSampler* toyMCSampler = dynamic_cast<ToyMCSampler*>(fTestStatSampler);
181 if(fAdaptiveSampling && toyMCSampler) {
182 do{
183 // this will be executed first, then while conditioned checked
184 // as an exit condition for the loop.
185
186 // the next line is where most of the time will be spent
187 // generating the sampling dist of the test statistic.
188 additionalMC = 2*totalMC; // grow by a factor of two
189 samplingDist =
190 toyMCSampler->AppendSamplingDistribution(*point,
191 samplingDist,
192 additionalMC);
193 if (!samplingDist) {
194 oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
195 return 0;
196 }
197 totalMC=samplingDist->GetSize();
198
199 //cout << "without sigma upper = " <<
200 //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;
201
202 sigma = 1;
203 upperEdgeOfAcceptance =
204 samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
205 sigma, upperEdgePlusSigma);
206 sigma = -1;
207 samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
208 sigma, upperEdgeMinusSigma);
209
210 sigma = 1;
211 lowerEdgeOfAcceptance =
212 samplingDist->InverseCDF( fLeftSideFraction * fSize ,
213 sigma, lowerEdgePlusSigma);
214 sigma = -1;
215 samplingDist->InverseCDF( fLeftSideFraction * fSize ,
216 sigma, lowerEdgeMinusSigma);
217
218 ooccoutD(samplingDist,Eval) << "NeymanConstruction: "
219 << "total MC = " << totalMC
220 << " this test stat = " << thisTestStatistic << endl
221 << " upper edge -1sigma = " << upperEdgeMinusSigma
222 << ", upperEdge = "<<upperEdgeOfAcceptance
223 << ", upper edge +1sigma = " << upperEdgePlusSigma << endl
224 << " lower edge -1sigma = " << lowerEdgeMinusSigma
225 << ", lowerEdge = "<<lowerEdgeOfAcceptance
226 << ", lower edge +1sigma = " << lowerEdgePlusSigma << endl;
227 } while((
228 (thisTestStatistic <= upperEdgeOfAcceptance &&
229 thisTestStatistic > upperEdgeMinusSigma)
230 || (thisTestStatistic >= upperEdgeOfAcceptance &&
231 thisTestStatistic < upperEdgePlusSigma)
232 || (thisTestStatistic <= lowerEdgeOfAcceptance &&
233 thisTestStatistic > lowerEdgeMinusSigma)
234 || (thisTestStatistic >= lowerEdgeOfAcceptance &&
235 thisTestStatistic < lowerEdgePlusSigma)
236 ) && (totalMC < 100./fSize)
237 ) ; // need ; here
238 } else {
239 // the next line is where most of the time will be spent
240 // generating the sampling dist of the test statistic.
241 samplingDist = fTestStatSampler->GetSamplingDistribution(*point);
242 if (!samplingDist) {
243 oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
244 return 0;
245 }
246
247 lowerEdgeOfAcceptance =
248 samplingDist->InverseCDF( fLeftSideFraction * fSize );
249 upperEdgeOfAcceptance =
250 samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );
251 }
252
253 // add acceptance region to ConfidenceBelt
254 if(fConfBelt && fCreateBelt){
255 // cout << "conf belt set " << fConfBelt << endl;
257 lowerEdgeOfAcceptance,
258 upperEdgeOfAcceptance);
259 }
260
261 // printout some debug info
262 TIter itr = point->createIterator();
263 RooRealVar* myarg;
264 ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<<fPointsToTest->numEntries()
265 << " total MC = " << samplingDist->GetSize()
266 << " this test stat = " << thisTestStatistic << endl;
267 ooccoutP(samplingDist,Eval) << " ";
268 while ((myarg = (RooRealVar *)itr.Next())) {
269 ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";
270 }
271 ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", "
272 << upperEdgeOfAcceptance << "] " << " in interval = " <<
273 (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance)
274 << endl << endl;
275
276 // Check if this data is in the acceptance region
277 if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {
278 // if so, set this point to true
279 // fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet
280 pointsInInterval->add(*point);
281 ++npass;
282 }
283
284 if(fSaveBeltToFile){
285 //write to file
286 samplingDist->Write();
287 string tmpName = "hist_";
288 tmpName+=samplingDist->GetName();
289 TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);
290 for(int ii=0; ii<samplingDist->GetSize(); ++ii){
291 h->Fill(samplingDist->GetSamplingDistribution().at(ii) );
292 }
293 h->Write();
294 delete h;
295 }
296
297 delete samplingDist;
298 // delete point; // from dataset
299 }
300 oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;
301
302 // create an interval based pointsInInterval
303 PointSetInterval* interval
304 = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);
305
306
307 if(fSaveBeltToFile){
308 // write belt to file
309 fConfBelt->Write();
310
311 f->Close();
312 }
313
314 delete f;
315 //delete data;
316 return interval;
317}
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
#define oocoutE(o, a)
Definition: RooMsgService.h:47
#define oocoutI(o, a)
Definition: RooMsgService.h:44
#define ooccoutP(o, a)
Definition: RooMsgService.h:52
#define ooccoutD(o, a)
Definition: RooMsgService.h:50
int Int_t
Definition: RtypesCore.h:41
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
TIterator * createIterator(Bool_t dir=kIterForward) const R__SUGGEST_ALTERNATIVE("begin()
TIterator-style iteration over contained elements.
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
virtual const RooArgSet * get() const
Definition: RooAbsData.h:80
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:306
Double_t getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:81
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:31
virtual void add(const RooArgSet &row, Double_t weight=1.0, Double_t weightError=0)
Add a data point, with its coordinates specified in the 'data' argset, to the data set.
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
void AddAcceptanceRegion(RooArgSet &, AcceptanceRegion region, Double_t cl=-1., Double_t leftside=-1.)
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:30
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:231
NeymanConstruction is a concrete implementation of the NeymanConstruction interface that,...
virtual PointSetInterval * GetInterval() const
Main interface to get a ConfInterval (will be a PointSetInterval)
virtual ~NeymanConstruction()
default constructor if(fOwnsWorkspace && fWS) delete fWS; if(fConfBelt) delete fConfBelt;
ModelConfig & fModel
data set
RooAbsData & fData
size of the test (eg. specified rate of Type I error)
PointSetInterval is a concrete implementation of the ConfInterval interface.
This class simply holds a sampling distribution of some test statistic.
Double_t InverseCDF(Double_t pvalue)
get the inverse of the Cumulative distribution function
Int_t GetSize() const
size of samples
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
virtual Double_t EvaluateTestStatistic(RooAbsData &data, RooArgSet &paramsOfInterest)=0
virtual void SetParametersForTestStat(const RooArgSet &)=0
virtual SamplingDistribution * GetSamplingDistribution(RooArgSet &paramsOfInterest)=0
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:72
virtual SamplingDistribution * AppendSamplingDistribution(RooArgSet &allParameters, SamplingDistribution *last, Int_t additionalMC)
Extended interface to append to sampling distribution more samples.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
TObject * Next()
Definition: TCollection.h:249
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
const Double_t sigma
Template specialisation used in RooAbsArg:
Namespace for the RooStats classes.
Definition: Asimov.h:20
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180