Logo ROOT   6.08/07
Reference Guide
NeymanConstruction.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer January 2009
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////////////////////////
13 
14 
15 
16 #ifndef RooStats_NeymanConstruction
18 #endif
19 
20 #ifndef RooStats_RooStatsUtils
21 #include "RooStats/RooStatsUtils.h"
22 #endif
23 
24 #ifndef RooStats_PointSetInterval
26 #endif
27 
29 #include "RooStats/ToyMCSampler.h"
30 #include "RooStats/ModelConfig.h"
31 
32 #include "RooMsgService.h"
33 #include "RooGlobalFunc.h"
34 
35 #include "RooDataSet.h"
36 #include "TFile.h"
37 #include "TTree.h"
38 #include "TMath.h"
39 #include "TH1F.h"
40 
41 
42 
44 
45 using namespace RooFit;
46 using namespace RooStats;
47 using namespace std;
48 
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 
52 NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):
53  fSize(0.05),
54  fData(data),
55  fModel(model),
56  fTestStatSampler(0),
57  fPointsToTest(0),
58  fLeftSideFraction(0),
59  fConfBelt(0), // constructed with tree data
60  fAdaptiveSampling(false),
61  fAdditionalNToysFactor(1.),
62  fSaveBeltToFile(false),
63  fCreateBelt(false)
64 
65 {
66  // default constructor
67 // fWS = new RooWorkspace();
68 // fOwnsWorkspace = true;
69 // fDataName = "";
70 // fPdfName = "";
71 }
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 /// default constructor
75 /// if(fOwnsWorkspace && fWS) delete fWS;
76 /// if(fConfBelt) delete fConfBelt;
77 
79 }
80 
81 ////////////////////////////////////////////////////////////////////////////////
82 /// Main interface to get a RooStats::ConfInterval.
83 /// It constructs a RooStats::SetInterval.
84 
86 
87  TFile* f=0;
88  if(fSaveBeltToFile){
89  //coverity[FORWARD_NULL]
90  oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;
91  f = new TFile("SamplingDistributions.root","recreate");
92  }
93 
94  Int_t npass = 0;
95  RooArgSet* point;
96 
97  // strange problems when using snapshots.
98  // RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();
100 
101  RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval",
102  "points in interval",
103  *(fPointsToTest->get(0)) );
104 
105  // loop over points to test
106  for(Int_t i=0; i<fPointsToTest->numEntries(); ++i){
107  // get a parameter point from the list of points to test.
108  point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");
109 
110  // set parameters of interest to current point
111  *fPOI = *point;
112 
113  // set test stat sampler to use this point
115 
116  // get the value of the test statistic for this data set
117  Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );
118  /*
119  cout << "NC CHECK: " << i << endl;
120  point->Print();
121  fPOI->Print("v");
122  fData.Print();
123  cout <<"thisTestStatistic = " << thisTestStatistic << endl;
124  */
125 
126  // find the lower & upper thresholds on the test statistic that
127  // define the acceptance region in the data
128 
129  SamplingDistribution* samplingDist=0;
130  Double_t sigma;
131  Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;
132  Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;
133  Int_t additionalMC=0;
134 
135  // the adaptive sampling algorithm wants at least one toy event to be outside
136  // of the requested pvalue including the sampling variaton. That leads to an equation
137  // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and
138  // 1 = alpha N - Z sqrt(alpha N) // for lower limit
139  //
140  // solving for N gives:
141  // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)
142  // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha
143  // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)
144  // totalMC will be incremented by 2 before first call, so initiated it at half the value
146  if(fLeftSideFraction==0. || fLeftSideFraction ==1.){
147  totalMC = (Int_t) (2./fSize);
148  }
149  // use control
151  totalMC = (Int_t) tmc;
152 
153  ToyMCSampler* toyMCSampler = dynamic_cast<ToyMCSampler*>(fTestStatSampler);
154  if(fAdaptiveSampling && toyMCSampler) {
155  do{
156  // this will be executed first, then while conditioned checked
157  // as an exit condition for the loop.
158 
159  // the next line is where most of the time will be spent
160  // generating the sampling dist of the test statistic.
161  additionalMC = 2*totalMC; // grow by a factor of two
162  samplingDist =
163  toyMCSampler->AppendSamplingDistribution(*point,
164  samplingDist,
165  additionalMC);
166  if (!samplingDist) {
167  oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
168  return 0;
169  }
170  totalMC=samplingDist->GetSize();
171 
172  //cout << "without sigma upper = " <<
173  //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;
174 
175  sigma = 1;
176  upperEdgeOfAcceptance =
177  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
178  sigma, upperEdgePlusSigma);
179  sigma = -1;
180  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
181  sigma, upperEdgeMinusSigma);
182 
183  sigma = 1;
184  lowerEdgeOfAcceptance =
185  samplingDist->InverseCDF( fLeftSideFraction * fSize ,
186  sigma, lowerEdgePlusSigma);
187  sigma = -1;
188  samplingDist->InverseCDF( fLeftSideFraction * fSize ,
189  sigma, lowerEdgeMinusSigma);
190 
191  ooccoutD(samplingDist,Eval) << "NeymanConstruction: "
192  << "total MC = " << totalMC
193  << " this test stat = " << thisTestStatistic << endl
194  << " upper edge -1sigma = " << upperEdgeMinusSigma
195  << ", upperEdge = "<<upperEdgeOfAcceptance
196  << ", upper edge +1sigma = " << upperEdgePlusSigma << endl
197  << " lower edge -1sigma = " << lowerEdgeMinusSigma
198  << ", lowerEdge = "<<lowerEdgeOfAcceptance
199  << ", lower edge +1sigma = " << lowerEdgePlusSigma << endl;
200  } while((
201  (thisTestStatistic <= upperEdgeOfAcceptance &&
202  thisTestStatistic > upperEdgeMinusSigma)
203  || (thisTestStatistic >= upperEdgeOfAcceptance &&
204  thisTestStatistic < upperEdgePlusSigma)
205  || (thisTestStatistic <= lowerEdgeOfAcceptance &&
206  thisTestStatistic > lowerEdgeMinusSigma)
207  || (thisTestStatistic >= lowerEdgeOfAcceptance &&
208  thisTestStatistic < lowerEdgePlusSigma)
209  ) && (totalMC < 100./fSize)
210  ) ; // need ; here
211  } else {
212  // the next line is where most of the time will be spent
213  // generating the sampling dist of the test statistic.
214  samplingDist = fTestStatSampler->GetSamplingDistribution(*point);
215  if (!samplingDist) {
216  oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
217  return 0;
218  }
219 
220  lowerEdgeOfAcceptance =
221  samplingDist->InverseCDF( fLeftSideFraction * fSize );
222  upperEdgeOfAcceptance =
223  samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );
224  }
225 
226  // add acceptance region to ConfidenceBelt
227  if(fConfBelt && fCreateBelt){
228  // cout << "conf belt set " << fConfBelt << endl;
229  fConfBelt->AddAcceptanceRegion(*point, i,
230  lowerEdgeOfAcceptance,
231  upperEdgeOfAcceptance);
232  }
233 
234  // printout some debug info
235  TIter itr = point->createIterator();
236  RooRealVar* myarg;
237  ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<<fPointsToTest->numEntries()
238  << " total MC = " << samplingDist->GetSize()
239  << " this test stat = " << thisTestStatistic << endl;
240  ooccoutP(samplingDist,Eval) << " ";
241  while ((myarg = (RooRealVar *)itr.Next())) {
242  ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";
243  }
244  ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", "
245  << upperEdgeOfAcceptance << "] " << " in interval = " <<
246  (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance)
247  << endl << endl;
248 
249  // Check if this data is in the acceptance region
250  if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {
251  // if so, set this point to true
252  // fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet
253  pointsInInterval->add(*point);
254  ++npass;
255  }
256 
257  if(fSaveBeltToFile){
258  //write to file
259  samplingDist->Write();
260  string tmpName = "hist_";
261  tmpName+=samplingDist->GetName();
262  TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);
263  for(int ii=0; ii<samplingDist->GetSize(); ++ii){
264  h->Fill(samplingDist->GetSamplingDistribution().at(ii) );
265  }
266  h->Write();
267  delete h;
268  }
269 
270  delete samplingDist;
271  // delete point; // from dataset
272  }
273  oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;
274 
275  // create an interval based pointsInInterval
276  PointSetInterval* interval
277  = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);
278 
279 
280  if(fSaveBeltToFile){
281  // write belt to file
282  fConfBelt->Write();
283 
284  f->Close();
285  }
286 
287  delete f;
288  //delete data;
289  return interval;
290 }
291 
292 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:830
const int npass
Definition: testPermute.cxx:21
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
TIterator * createIterator(Bool_t dir=kIterForward) const
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:52
virtual void SetParametersForTestStat(const RooArgSet &)=0
virtual const RooArgSet * get() const
Definition: RooAbsData.h:77
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
virtual PointSetInterval * GetInterval() const
Main interface to get a ConfInterval (will be a PointSetInterval)
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
Double_t InverseCDF(Double_t pvalue)
get the inverse of the Cumulative distribution function
TH1 * h
Definition: legend2.C:5
#define oocoutI(o, a)
Definition: RooMsgService.h:45
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
#define ooccoutP(o, a)
Definition: RooMsgService.h:53
STL namespace.
void AddAcceptanceRegion(RooArgSet &, AcceptanceRegion region, Double_t cl=-1., Double_t leftside=-1.)
#define oocoutE(o, a)
Definition: RooMsgService.h:48
virtual SamplingDistribution * AppendSamplingDistribution(RooArgSet &allParameters, SamplingDistribution *last, Int_t additionalMC)
const Double_t sigma
virtual SamplingDistribution * GetSamplingDistribution(RooArgSet &paramsOfInterest)=0
virtual ~NeymanConstruction()
default constructor if(fOwnsWorkspace && fWS) delete fWS; if(fConfBelt) delete fConfBelt; ...
RooAbsData & fData
size of the test (eg. specified rate of Type I error)
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:37
TObject * Next()
Definition: TCollection.h:158
ModelConfig & fModel
data set
Int_t GetSize() const
size of samples
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:99
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
This class simply holds a sampling distribution of some test statistic.
Namespace for the RooStats classes.
Definition: Asimov.h:20
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:247
PointSetInterval is a concrete implementation of the ConfInterval interface.
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
double Double_t
Definition: RtypesCore.h:55
NeymanConstruction is a concrete implementation of the NeymanConstruction interface that...
#define ooccoutD(o, a)
Definition: RooMsgService.h:51
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Double_t EvaluateTestStatistic(RooAbsData &data, RooArgSet &paramsOfInterest)=0
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:269