ROOT   6.21/01 Reference Guide
StandardTestStatDistributionDemo.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook
4 /// StandardTestStatDistributionDemo.C
5 ///
6 /// This simple script plots the sampling distribution of the profile likelihood
7 /// ratio test statistic based on the input Model File. To do this one needs to
8 /// specify the value of the parameter of interest that will be used for evaluating
9 /// the test statistic and the value of the parameters used for generating the toy data.
10 /// In this case, it uses the upper-limit estimated from the ProfileLikleihoodCalculator,
11 /// which assumes the asymptotic chi-square distribution for -2 log profile likelihood ratio.
12 /// Thus, the script is handy for checking to see if the asymptotic approximations are valid.
13 /// To aid, that comparison, the script overlays a chi-square distribution as well.
14 /// The most common parameter of interest is a parameter proportional to the signal rate,
15 /// and often that has a lower-limit of 0, which breaks the standard chi-square distribution.
16 /// Thus the script allows the parameter to be negative so that the overlay chi-square is
17 /// the correct asymptotic distribution.
18 ///
19 /// \macro_image
20 /// \macro_output
21 /// \macro_code
22 ///
23 /// \author Kyle Cranmer
24
25 #include "TFile.h"
26 #include "TROOT.h"
27 #include "TH1F.h"
28 #include "TCanvas.h"
29 #include "TSystem.h"
30 #include "TF1.h"
31 #include "TSystem.h"
32
33 #include "RooWorkspace.h"
34 #include "RooAbsData.h"
35
36 #include "RooStats/ModelConfig.h"
38 #include "RooStats/ToyMCSampler.h"
41
47
48 using namespace RooFit;
49 using namespace RooStats;
50
51 bool useProof = false; // flag to control whether to use Proof
52 int nworkers = 0; // number of workers (default use all available cores)
53
54 // -------------------------------------------------------
55 // The actual macro
56
57 void StandardTestStatDistributionDemo(const char *infile = "", const char *workspaceName = "combined",
58  const char *modelConfigName = "ModelConfig", const char *dataName = "obsData")
59 {
60
61  // the number of toy MC used to generate the distribution
62  int nToyMC = 1000;
63  // The parameter below is needed for asymptotic distribution to be chi-square,
64  // but set to false if your model is not numerically stable if mu<0
65  bool allowNegativeMu = true;
66
67  // -------------------------------------------------------
68  // First part is just to access a user-defined file
69  // or create the standard example file if it doesn't exist
70  const char *filename = "";
71  if (!strcmp(infile, "")) {
72  filename = "results/example_combined_GaussExample_model.root";
73  bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
74  // if file does not exists generate with histfactory
75  if (!fileExist) {
76 #ifdef _WIN32
77  cout << "HistFactory file cannot be generated on Windows - exit" << endl;
78  return;
79 #endif
80  // Normally this would be run on the command line
81  cout << "will run standard hist2workspace example" << endl;
82  gROOT->ProcessLine(".! prepareHistFactory .");
83  gROOT->ProcessLine(".! hist2workspace config/example.xml");
84  cout << "\n\n---------------------" << endl;
85  cout << "Done creating example input" << endl;
86  cout << "---------------------\n\n" << endl;
87  }
88
89  } else
90  filename = infile;
91
92  // Try to open the file
93  TFile *file = TFile::Open(filename);
94
96  if (!file) {
97  cout << "StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
98  return;
99  }
100
101  // -------------------------------------------------------
102  // Now get the data and workspace
103
104  // get the workspace out of the file
105  RooWorkspace *w = (RooWorkspace *)file->Get(workspaceName);
106  if (!w) {
108  return;
109  }
110
111  // get the modelConfig out of the file
112  ModelConfig *mc = (ModelConfig *)w->obj(modelConfigName);
113
114  // get the modelConfig out of the file
115  RooAbsData *data = w->data(dataName);
116
117  // make sure ingredients are found
118  if (!data || !mc) {
119  w->Print();
121  return;
122  }
123
124  mc->Print();
125  // -------------------------------------------------------
126  // Now find the upper limit based on the asymptotic results
127  RooRealVar *firstPOI = (RooRealVar *)mc->GetParametersOfInterest()->first();
128  ProfileLikelihoodCalculator plc(*data, *mc);
129  LikelihoodInterval *interval = plc.GetInterval();
130  double plcUpperLimit = interval->UpperLimit(*firstPOI);
131  delete interval;
132  cout << "\n\n--------------------------------------" << endl;
133  cout << "Will generate sampling distribution at " << firstPOI->GetName() << " = " << plcUpperLimit << endl;
134  int nPOI = mc->GetParametersOfInterest()->getSize();
135  if (nPOI > 1) {
136  cout << "not sure what to do with other parameters of interest, but here are their values" << endl;
137  mc->GetParametersOfInterest()->Print("v");
138  }
139
140  // -------------------------------------------------------
141  // create the test stat sampler
143
144  // to avoid effects from boundary and simplify asymptotic comparison, set min=-max
145  if (allowNegativeMu)
146  firstPOI->setMin(-1 * firstPOI->getMax());
147
148  // temporary RooArgSet
149  RooArgSet poi;
151
152  // create and configure the ToyMCSampler
153  ToyMCSampler sampler(ts, nToyMC);
154  sampler.SetPdf(*mc->GetPdf());
155  sampler.SetObservables(*mc->GetObservables());
156  sampler.SetGlobalObservables(*mc->GetGlobalObservables());
157  if (!mc->GetPdf()->canBeExtended() && (data->numEntries() == 1)) {
158  cout << "tell it to use 1 event" << endl;
159  sampler.SetNEventsPerToy(1);
160  }
161  firstPOI->setVal(plcUpperLimit); // set POI value for generation
162  sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation
163
164  if (useProof) {
165  ProofConfig pc(*w, nworkers, "", false);
166  sampler.SetProofConfig(&pc); // enable proof
167  }
168
169  firstPOI->setVal(plcUpperLimit);
170  RooArgSet allParameters;
173  allParameters.Print("v");
174
175  SamplingDistribution *sampDist = sampler.GetSamplingDistribution(allParameters);
176  SamplingDistPlot plot;
178  plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(
179  Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)", plcUpperLimit, plcUpperLimit));
180  plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)", plcUpperLimit));
181
182  TCanvas *c1 = new TCanvas("c1");
183  c1->SetLogy();
184  plot.Draw();
185  double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin();
186  double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax();
187
188  TF1 *f = new TF1("f", Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)", nPOI), min, max);
189  f->Draw("same");
190  c1->SaveAs("standard_test_stat_distribution.pdf");
191 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1279
Holds configuration options for proof and proof-lite.
Definition: ProofConfig.h:46
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:30
const RooArgSet * GetObservables() const
get RooArgSet for observables (return NULL if not existing)
Definition: ModelConfig.h:249
virtual Double_t getMax(const char *name=0) const
Get maximum of currently defined range.
virtual Bool_t add(const RooAbsCollection &col, Bool_t silent=kFALSE)
Add a collection of arguments to this collection by calling add() for each element in the source coll...
Definition: RooArgSet.h:88
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3925
LikelihoodInterval is a concrete implementation of the RooStats::ConfInterval interface.
return c1
Definition: legend1.C:41
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:48
The ProfileLikelihoodCalculator is a concrete implementation of CombinedCalculator (the interface cla...
Double_t AddSamplingDistribution(const SamplingDistribution *samplingDist, Option_t *drawOptions="NORMALIZE HIST")
adds the sampling distribution and returns the scale factor
#define gROOT
Definition: TROOT.h:415
#define f(i)
Definition: RSha256.hxx:104
Double_t GetXmin() const
Definition: TAxis.h:133
virtual void Print(Option_t *option="") const override
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
void Draw(Option_t *options=0)
Draw this plot and all of the elements it contains.
virtual void Print(Option_t *options=0) const
This method must be overridden when a class wants to print itself.
RooRealVar represents a fundamental (non-derived) real-valued object.
Definition: RooRealVar.h:36
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found...
virtual void setVal(Double_t value)
Set value of variable to &#39;value&#39;.
Definition: RooRealVar.cxx:252
Int_t getSize() const
Double_t UpperLimit(const RooRealVar &param)
return the upper bound of the interval on a given parameter
R__EXTERN TSystem * gSystem
Definition: TSystem.h:557
static constexpr double pc
RooAbsArg * first() const
void setMin(const char *name, Double_t value)
Set minimum of name range to given value.
Definition: RooRealVar.cxx:435
void SetAxisTitle(char *varName)
char * Form(const char *fmt,...)
TAxis * GetYaxis()
Definition: TH1.h:317
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:44
ProfileLikelihoodTestStat is an implementation of the TestStatistic interface that calculates the pro...
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:71
This class simply holds a sampling distribution of some test statistic.
Bool_t canBeExtended() const
Definition: RooAbsPdf.h:236
TObject * obj(const char *name) const
Return any type of object (RooAbsArg, RooAbsData or generic object) with given name) ...
The Canvas class.
Definition: TCanvas.h:31
Namespace for the RooStats classes.
Definition: Asimov.h:20
RooAbsPdf * GetPdf() const
get model PDF (return NULL if pdf has not been specified or does not exist)
Definition: ModelConfig.h:234
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:237
TH1F * GetTH1F(const SamplingDistribution *samplDist=NULL)
Returns the TH1F associated with the give SamplingDistribution.
This class provides simple and straightforward utilities to plot SamplingDistribution objects...
const RooArgSet * GetGlobalObservables() const
get RooArgSet for global observables (return NULL if not existing)
Definition: ModelConfig.h:255
Definition: file.py:1
const RooArgSet * GetNuisanceParameters() const
get RooArgSet containing the nuisance parameters (return NULL if not existing)
Definition: ModelConfig.h:240
1-Dim function class
Definition: TF1.h:211
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
void Print(Option_t *opts=0) const
Print contents of the workspace.
Double_t GetXmax() const
Definition: TAxis.h:134
TAxis * GetXaxis()
Definition: TH1.h:316
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:306