Logo ROOT   6.12/07
Reference Guide
StandardTestStatDistributionDemo.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook
4 /// StandardTestStatDistributionDemo.C
5 ///
6 /// This simple script plots the sampling distribution of the profile likelihood
7 /// ratio test statistic based on the input Model File. To do this one needs to
8 /// specify the value of the parameter of interest that will be used for evaluating
9 /// the test statistic and the value of the parameters used for generating the toy data.
10 /// In this case, it uses the upper-limit estimated from the ProfileLikleihoodCalculator,
11 /// which assumes the asymptotic chi-square distribution for -2 log profile likelihood ratio.
12 /// Thus, the script is handy for checking to see if the asymptotic approximations are valid.
13 /// To aid, that comparison, the script overlays a chi-square distribution as well.
14 /// The most common parameter of interest is a parameter proportional to the signal rate,
15 /// and often that has a lower-limit of 0, which breaks the standard chi-square distribution.
16 /// Thus the script allows the parameter to be negative so that the overlay chi-square is
17 /// the correct asymptotic distribution.
18 ///
19 /// \macro_image
20 /// \macro_output
21 /// \macro_code
22 ///
23 /// \author Kyle Cranmer
24 
25 #include "TFile.h"
26 #include "TROOT.h"
27 #include "TH1F.h"
28 #include "TCanvas.h"
29 #include "TSystem.h"
30 #include "TF1.h"
31 #include "TSystem.h"
32 
33 #include "RooWorkspace.h"
34 #include "RooAbsData.h"
35 
36 #include "RooStats/ModelConfig.h"
38 #include "RooStats/ToyMCSampler.h"
41 
47 
48 using namespace RooFit;
49 using namespace RooStats;
50 
51 
52 bool useProof = false; // flag to control whether to use Proof
53 int nworkers = 0; // number of workers (default use all available cores)
54 
55 // -------------------------------------------------------
56 // The actual macro
57 
58 void StandardTestStatDistributionDemo(const char* infile = "",
59  const char* workspaceName = "combined",
60  const char* modelConfigName = "ModelConfig",
61  const char* dataName = "obsData"){
62 
63 
64  // the number of toy MC used to generate the distribution
65  int nToyMC = 1000;
66  // The parameter below is needed for asymptotic distribution to be chi-square,
67  // but set to false if your model is not numerically stable if mu<0
68  bool allowNegativeMu=true;
69 
70 
71  // -------------------------------------------------------
72  // First part is just to access a user-defined file
73  // or create the standard example file if it doesn't exist
74  const char* filename = "";
75  if (!strcmp(infile,"")) {
76  filename = "results/example_combined_GaussExample_model.root";
77  bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
78  // if file does not exists generate with histfactory
79  if (!fileExist) {
80 #ifdef _WIN32
81  cout << "HistFactory file cannot be generated on Windows - exit" << endl;
82  return;
83 #endif
84  // Normally this would be run on the command line
85  cout <<"will run standard hist2workspace example"<<endl;
86  gROOT->ProcessLine(".! prepareHistFactory .");
87  gROOT->ProcessLine(".! hist2workspace config/example.xml");
88  cout <<"\n\n---------------------"<<endl;
89  cout <<"Done creating example input"<<endl;
90  cout <<"---------------------\n\n"<<endl;
91  }
92 
93  }
94  else
95  filename = infile;
96 
97  // Try to open the file
98  TFile *file = TFile::Open(filename);
99 
100  // if input file was specified byt not found, quit
101  if(!file ){
102  cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
103  return;
104  }
105 
106 
107  // -------------------------------------------------------
108  // Now get the data and workspace
109 
110  // get the workspace out of the file
111  RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
112  if(!w){
113  cout <<"workspace not found" << endl;
114  return;
115  }
116 
117  // get the modelConfig out of the file
118  ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName);
119 
120  // get the modelConfig out of the file
121  RooAbsData* data = w->data(dataName);
122 
123  // make sure ingredients are found
124  if(!data || !mc){
125  w->Print();
126  cout << "data or ModelConfig was not found" <<endl;
127  return;
128  }
129 
130  mc->Print();
131  // -------------------------------------------------------
132  // Now find the upper limit based on the asymptotic results
133  RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first();
134  ProfileLikelihoodCalculator plc(*data,*mc);
135  LikelihoodInterval* interval = plc.GetInterval();
136  double plcUpperLimit = interval->UpperLimit(*firstPOI);
137  delete interval;
138  cout << "\n\n--------------------------------------"<<endl;
139  cout <<"Will generate sampling distribution at " << firstPOI->GetName() << " = " << plcUpperLimit <<endl;
140  int nPOI = mc->GetParametersOfInterest()->getSize();
141  if(nPOI>1){
142  cout <<"not sure what to do with other parameters of interest, but here are their values"<<endl;
143  mc->GetParametersOfInterest()->Print("v");
144  }
145 
146  // -------------------------------------------------------
147  // create the test stat sampler
149 
150  // to avoid effects from boundary and simplify asymptotic comparison, set min=-max
151  if(allowNegativeMu)
152  firstPOI->setMin(-1*firstPOI->getMax());
153 
154  // temporary RooArgSet
155  RooArgSet poi;
156  poi.add(*mc->GetParametersOfInterest());
157 
158  // create and configure the ToyMCSampler
159  ToyMCSampler sampler(ts,nToyMC);
160  sampler.SetPdf(*mc->GetPdf());
161  sampler.SetObservables(*mc->GetObservables());
162  sampler.SetGlobalObservables(*mc->GetGlobalObservables());
163  if(!mc->GetPdf()->canBeExtended() && (data->numEntries()==1)){
164  cout << "tell it to use 1 event"<<endl;
165  sampler.SetNEventsPerToy(1);
166  }
167  firstPOI->setVal(plcUpperLimit); // set POI value for generation
168  sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation
169 
170  if (useProof) {
171  ProofConfig pc(*w, nworkers, "",false);
172  sampler.SetProofConfig(&pc); // enable proof
173  }
174 
175  firstPOI->setVal(plcUpperLimit);
176  RooArgSet allParameters;
177  allParameters.add(*mc->GetParametersOfInterest());
178  allParameters.add(*mc->GetNuisanceParameters());
179  allParameters.Print("v");
180 
181  SamplingDistribution* sampDist = sampler.GetSamplingDistribution(allParameters);
182  SamplingDistPlot plot;
183  plot.AddSamplingDistribution(sampDist);
184  plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)",plcUpperLimit,plcUpperLimit));
185  plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)",plcUpperLimit));
186 
187  TCanvas* c1 = new TCanvas("c1");
188  c1->SetLogy();
189  plot.Draw();
190  double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin();
191  double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax();
192 
193  TF1* f = new TF1("f",Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI),min,max);
194  f->Draw("same");
195  c1->SaveAs("standard_test_stat_distribution.pdf");
196 
197 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1276
Holds configuration options for proof and proof-lite.
Definition: ProofConfig.h:46
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:30
const RooArgSet * GetObservables() const
get RooArgSet for observables (return NULL if not existing)
Definition: ModelConfig.h:237
virtual Double_t getMax(const char *name=0) const
virtual Bool_t add(const RooAbsCollection &col, Bool_t silent=kFALSE)
Add a collection of arguments to this collection by calling add() for each element in the source coll...
Definition: RooArgSet.h:86
LikelihoodInterval is a concrete implementation of the RooStats::ConfInterval interface.
return c1
Definition: legend1.C:41
ProfileLikelihoodCalculator is a concrete implementation of CombinedCalculator (the interface class f...
Double_t AddSamplingDistribution(const SamplingDistribution *samplingDist, Option_t *drawOptions="NORMALIZE HIST")
adds the sampling distribution and returns the scale factor
#define gROOT
Definition: TROOT.h:402
virtual void Draw(Option_t *option="")
Draw this function with its current attributes.
Definition: TF1.cxx:1222
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
Double_t GetXmin() const
Definition: TAxis.h:133
void Draw(Option_t *options=0)
Draw this plot and all of the elements it contains.
virtual void Print(Option_t *options=0) const
This method must be overridden when a class wants to print itself.
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found...
virtual void setVal(Double_t value)
Set value of variable to &#39;value&#39;.
Definition: RooRealVar.cxx:205
Int_t getSize() const
Double_t UpperLimit(const RooRealVar &param)
return the upper bound of the interval on a given parameter
virtual void Print(Option_t *option="") const
overload the print method
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
RooAbsArg * first() const
void setMin(const char *name, Double_t value)
Set minimum of name range to given value.
Definition: RooRealVar.cxx:388
void SetAxisTitle(char *varName)
char * Form(const char *fmt,...)
TAxis * GetYaxis()
Definition: TH1.h:316
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
ProfileLikelihoodTestStat is an implementation of the TestStatistic interface that calculates the pro...
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:71
This class simply holds a sampling distribution of some test statistic.
Bool_t canBeExtended() const
Definition: RooAbsPdf.h:216
TObject * obj(const char *name) const
Return any type of object (RooAbsArg, RooAbsData or generic object) with given name) ...
The Canvas class.
Definition: TCanvas.h:31
Namespace for the RooStats classes.
Definition: Asimov.h:20
RooAbsPdf * GetPdf() const
get model PDF (return NULL if pdf has not been specified or does not exist)
Definition: ModelConfig.h:222
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:225
TH1F * GetTH1F(const SamplingDistribution *samplDist=NULL)
Returns the TH1F associated with the give SamplingDistribution.
This class provides simple and straightforward utilities to plot SamplingDistribution objects...
const RooArgSet * GetGlobalObservables() const
get RooArgSet for global observables (return NULL if not existing)
Definition: ModelConfig.h:243
Definition: file.py:1
const RooArgSet * GetNuisanceParameters() const
get RooArgSet containing the nuisance parameters (return NULL if not existing)
Definition: ModelConfig.h:228
1-Dim function class
Definition: TF1.h:211
static constexpr double pc
virtual void SaveAs(const char *filename="", Option_t *option="") const
Save Pad contents in a file in one of various formats.
Definition: TPad.cxx:5486
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
void Print(Option_t *opts=0) const
Print contents of the workspace.
Double_t GetXmax() const
Definition: TAxis.h:134
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:42
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:285
virtual void SetLogy(Int_t value=1)
Set Lin/Log scale for Y.
Definition: TPad.cxx:5798