Logo ROOT   6.10/09
Reference Guide
NeymanConstruction.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer January 2009
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class RooStats::NeymanConstruction
13  \ingroup Roostats
14 
15 NeymanConstruction is a concrete implementation of the NeymanConstruction
16 interface that, as the name suggests, performs a NeymanConstruction. It produces
17 a RooStats::PointSetInterval, which is a concrete implementation of the
18 ConfInterval interface.
19 
20 The Neyman Construction is not a uniquely defined statistical technique, it
21 requires that one specify an ordering rule or ordering principle, which is
22 usually incoded by choosing a specific test statistic and limits of integration
23 (corresponding to upper/lower/central limits). As a result, this class must be
24 configured with the corresponding information before it can produce an interval.
25 Common configurations, such as the Feldman-Cousins approach, can be enforced by
26 other light weight classes.
27 
28 The Neyman Construction considers every point in the parameter space
29 independently, no assumptions are made that the interval is connected or of a
30 particular shape. As a result, the PointSetInterval class is used to represent
31 the result. The user indicate which points in the parameter space to perform
32 the construction by providing a PointSetInterval instance with the desired points.
33 
34 This class is fairly light weight, because the choice of parameter points to be
35 considered is factorized and so is the creation of the sampling distribution of
36 the test statistic (which is done by a concrete class implementing the
37 DistributionCreator interface). As a result, this class basically just drives the
38 construction by:
39 
40  - using a DistributionCreator to create the SamplingDistribution of a user-
41  defined test statistic for each parameter point of interest,
42  - defining the acceptance region in the data by finding the thresholds on the
43  test statistic such that the integral of the sampling distribution is of the
44  appropriate size and consistent with the limits of integration
45  (eg. upper/lower/central limits),
46  - and finally updating the PointSetInterval based on whether the value of the
47  test statistic evaluated on the data are in the acceptance region.
48 
49 */
50 
52 
53 #include "RooStats/RooStatsUtils.h"
54 
56 
58 #include "RooStats/ToyMCSampler.h"
59 #include "RooStats/ModelConfig.h"
60 
61 #include "RooMsgService.h"
62 #include "RooGlobalFunc.h"
63 
64 #include "RooDataSet.h"
65 #include "TFile.h"
66 #include "TTree.h"
67 #include "TMath.h"
68 #include "TH1F.h"
69 
71 
72 using namespace RooFit;
73 using namespace RooStats;
74 using namespace std;
75 
76 
77 ////////////////////////////////////////////////////////////////////////////////
78 /// default constructor
79 
80 NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):
81  fSize(0.05),
82  fData(data),
83  fModel(model),
84  fTestStatSampler(0),
85  fPointsToTest(0),
86  fLeftSideFraction(0),
87  fConfBelt(0), // constructed with tree data
88  fAdaptiveSampling(false),
89  fAdditionalNToysFactor(1.),
90  fSaveBeltToFile(false),
91  fCreateBelt(false)
92 
93 {
94 // fWS = new RooWorkspace();
95 // fOwnsWorkspace = true;
96 // fDataName = "";
97 // fPdfName = "";
98 }
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// default constructor
102 /// if(fOwnsWorkspace && fWS) delete fWS;
103 /// if(fConfBelt) delete fConfBelt;
104 
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Main interface to get a RooStats::ConfInterval.
110 /// It constructs a RooStats::SetInterval.
111 
113 
114  TFile* f=0;
115  if(fSaveBeltToFile){
116  //coverity[FORWARD_NULL]
117  oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;
118  f = new TFile("SamplingDistributions.root","recreate");
119  }
120 
121  Int_t npass = 0;
122  RooArgSet* point;
123 
124  // strange problems when using snapshots.
125  // RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();
127 
128  RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval",
129  "points in interval",
130  *(fPointsToTest->get(0)) );
131 
132  // loop over points to test
133  for(Int_t i=0; i<fPointsToTest->numEntries(); ++i){
134  // get a parameter point from the list of points to test.
135  point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");
136 
137  // set parameters of interest to current point
138  *fPOI = *point;
139 
140  // set test stat sampler to use this point
142 
143  // get the value of the test statistic for this data set
144  Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );
145  /*
146  cout << "NC CHECK: " << i << endl;
147  point->Print();
148  fPOI->Print("v");
149  fData.Print();
150  cout <<"thisTestStatistic = " << thisTestStatistic << endl;
151  */
152 
153  // find the lower & upper thresholds on the test statistic that
154  // define the acceptance region in the data
155 
156  SamplingDistribution* samplingDist=0;
157  Double_t sigma;
158  Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;
159  Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;
160  Int_t additionalMC=0;
161 
162  // the adaptive sampling algorithm wants at least one toy event to be outside
163  // of the requested pvalue including the sampling variation. That leads to an equation
164  // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and
165  // 1 = alpha N - Z sqrt(alpha N) // for lower limit
166  //
167  // solving for N gives:
168  // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)
169  // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha
170  // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)
171  // totalMC will be incremented by 2 before first call, so initiated it at half the value
173  if(fLeftSideFraction==0. || fLeftSideFraction ==1.){
174  totalMC = (Int_t) (2./fSize);
175  }
176  // use control
178  totalMC = (Int_t) tmc;
179 
180  ToyMCSampler* toyMCSampler = dynamic_cast<ToyMCSampler*>(fTestStatSampler);
181  if(fAdaptiveSampling && toyMCSampler) {
182  do{
183  // this will be executed first, then while conditioned checked
184  // as an exit condition for the loop.
185 
186  // the next line is where most of the time will be spent
187  // generating the sampling dist of the test statistic.
188  additionalMC = 2*totalMC; // grow by a factor of two
189  samplingDist =
190  toyMCSampler->AppendSamplingDistribution(*point,
191  samplingDist,
192  additionalMC);
193  if (!samplingDist) {
194  oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
195  return 0;
196  }
197  totalMC=samplingDist->GetSize();
198 
199  //cout << "without sigma upper = " <<
200  //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;
201 
202  sigma = 1;
203  upperEdgeOfAcceptance =
204  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
205  sigma, upperEdgePlusSigma);
206  sigma = -1;
207  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
208  sigma, upperEdgeMinusSigma);
209 
210  sigma = 1;
211  lowerEdgeOfAcceptance =
212  samplingDist->InverseCDF( fLeftSideFraction * fSize ,
213  sigma, lowerEdgePlusSigma);
214  sigma = -1;
215  samplingDist->InverseCDF( fLeftSideFraction * fSize ,
216  sigma, lowerEdgeMinusSigma);
217 
218  ooccoutD(samplingDist,Eval) << "NeymanConstruction: "
219  << "total MC = " << totalMC
220  << " this test stat = " << thisTestStatistic << endl
221  << " upper edge -1sigma = " << upperEdgeMinusSigma
222  << ", upperEdge = "<<upperEdgeOfAcceptance
223  << ", upper edge +1sigma = " << upperEdgePlusSigma << endl
224  << " lower edge -1sigma = " << lowerEdgeMinusSigma
225  << ", lowerEdge = "<<lowerEdgeOfAcceptance
226  << ", lower edge +1sigma = " << lowerEdgePlusSigma << endl;
227  } while((
228  (thisTestStatistic <= upperEdgeOfAcceptance &&
229  thisTestStatistic > upperEdgeMinusSigma)
230  || (thisTestStatistic >= upperEdgeOfAcceptance &&
231  thisTestStatistic < upperEdgePlusSigma)
232  || (thisTestStatistic <= lowerEdgeOfAcceptance &&
233  thisTestStatistic > lowerEdgeMinusSigma)
234  || (thisTestStatistic >= lowerEdgeOfAcceptance &&
235  thisTestStatistic < lowerEdgePlusSigma)
236  ) && (totalMC < 100./fSize)
237  ) ; // need ; here
238  } else {
239  // the next line is where most of the time will be spent
240  // generating the sampling dist of the test statistic.
241  samplingDist = fTestStatSampler->GetSamplingDistribution(*point);
242  if (!samplingDist) {
243  oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
244  return 0;
245  }
246 
247  lowerEdgeOfAcceptance =
248  samplingDist->InverseCDF( fLeftSideFraction * fSize );
249  upperEdgeOfAcceptance =
250  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );
251  }
252 
253  // add acceptance region to ConfidenceBelt
254  if(fConfBelt && fCreateBelt){
255  // cout << "conf belt set " << fConfBelt << endl;
256  fConfBelt->AddAcceptanceRegion(*point, i,
257  lowerEdgeOfAcceptance,
258  upperEdgeOfAcceptance);
259  }
260 
261  // printout some debug info
262  TIter itr = point->createIterator();
263  RooRealVar* myarg;
264  ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<<fPointsToTest->numEntries()
265  << " total MC = " << samplingDist->GetSize()
266  << " this test stat = " << thisTestStatistic << endl;
267  ooccoutP(samplingDist,Eval) << " ";
268  while ((myarg = (RooRealVar *)itr.Next())) {
269  ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";
270  }
271  ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", "
272  << upperEdgeOfAcceptance << "] " << " in interval = " <<
273  (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance)
274  << endl << endl;
275 
276  // Check if this data is in the acceptance region
277  if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {
278  // if so, set this point to true
279  // fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet
280  pointsInInterval->add(*point);
281  ++npass;
282  }
283 
284  if(fSaveBeltToFile){
285  //write to file
286  samplingDist->Write();
287  string tmpName = "hist_";
288  tmpName+=samplingDist->GetName();
289  TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);
290  for(int ii=0; ii<samplingDist->GetSize(); ++ii){
291  h->Fill(samplingDist->GetSamplingDistribution().at(ii) );
292  }
293  h->Write();
294  delete h;
295  }
296 
297  delete samplingDist;
298  // delete point; // from dataset
299  }
300  oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;
301 
302  // create an interval based pointsInInterval
303  PointSetInterval* interval
304  = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);
305 
306 
307  if(fSaveBeltToFile){
308  // write belt to file
309  fConfBelt->Write();
310 
311  f->Close();
312  }
313 
314  delete f;
315  //delete data;
316  return interval;
317 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:778
const int npass
Definition: testPermute.cxx:21
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
TIterator * createIterator(Bool_t dir=kIterForward) const
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:30
virtual void SetParametersForTestStat(const RooArgSet &)=0
virtual const RooArgSet * get() const
Definition: RooAbsData.h:77
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
virtual PointSetInterval * GetInterval() const
Main interface to get a ConfInterval (will be a PointSetInterval)
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Double_t InverseCDF(Double_t pvalue)
get the inverse of the Cumulative distribution function
TH1 * h
Definition: legend2.C:5
#define oocoutI(o, a)
Definition: RooMsgService.h:44
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
int Int_t
Definition: RtypesCore.h:41
#define ooccoutP(o, a)
Definition: RooMsgService.h:52
STL namespace.
void AddAcceptanceRegion(RooArgSet &, AcceptanceRegion region, Double_t cl=-1., Double_t leftside=-1.)
#define oocoutE(o, a)
Definition: RooMsgService.h:47
virtual SamplingDistribution * AppendSamplingDistribution(RooArgSet &allParameters, SamplingDistribution *last, Int_t additionalMC)
Extended interface to append to sampling distribution more samples.
const Double_t sigma
virtual SamplingDistribution * GetSamplingDistribution(RooArgSet &paramsOfInterest)=0
virtual ~NeymanConstruction()
default constructor if(fOwnsWorkspace && fWS) delete fWS; if(fConfBelt) delete fConfBelt; ...
RooAbsData & fData
size of the test (eg. specified rate of Type I error)
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
TObject * Next()
Definition: TCollection.h:153
ModelConfig & fModel
data set
Int_t GetSize() const
size of samples
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:71
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
This class simply holds a sampling distribution of some test statistic.
Namespace for the RooStats classes.
Definition: Asimov.h:20
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:225
PointSetInterval is a concrete implementation of the ConfInterval interface.
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
double Double_t
Definition: RtypesCore.h:55
NeymanConstruction is a concrete implementation of the NeymanConstruction interface that...
#define ooccoutD(o, a)
Definition: RooMsgService.h:50
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Double_t EvaluateTestStatistic(RooAbsData &data, RooArgSet &paramsOfInterest)=0
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:269