Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
StandardHypoTestDemo.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_roostats
3/// \notebook
4/// Standard tutorial macro for hypothesis test (for computing the discovery significance) using all
5/// RooStats hypothesis tests calculators and test statistics.
6///
7/// Usage:
8///
9/// ~~~{.cpp}
10/// root>.L StandardHypoTestDemo.C
11/// root> StandardHypoTestDemo("fileName","workspace name","S+B modelconfig name","B model name","data set
12/// name",calculator type, test statistic type, number of toys)
13///
14/// type = 0 Freq calculator
15/// type = 1 Hybrid calculator
16/// type = 2 Asymptotic calculator
17/// type = 3 Asymptotic calculator using nominal Asimov data sets (not using fitted parameter values but nominal ones)
18///
19/// testStatType = 0 LEP
20/// = 1 Tevatron
21/// = 2 Profile Likelihood
22/// = 3 Profile Likelihood one sided (i.e. = 0 if mu_hat < 0)
23/// ~~~
24///
25/// \macro_image
26/// \macro_output
27/// \macro_code
28///
29/// \author Lorenzo Moneta
30
31#include "TFile.h"
32#include "RooWorkspace.h"
33#include "RooAbsPdf.h"
34#include "RooRealVar.h"
35#include "RooDataSet.h"
37#include "RooRandom.h"
38#include "TGraphErrors.h"
39#include "TGraphAsymmErrors.h"
40#include "TCanvas.h"
41#include "TLine.h"
42#include "TSystem.h"
43#include "TROOT.h"
44
50
56
60
61#include <cassert>
62
63using namespace RooFit;
64using namespace RooStats;
65
66struct HypoTestOptions {
67
68 bool noSystematics = false; // force all systematics to be off (i.e. set all nuisance parameters as constant)
69 double nToysRatio = 4; // ratio Ntoys Null/ntoys ALT
70 double poiValue = -1; // change poi snapshot value for S+B model (needed for expected p0 values)
71 int printLevel = 0;
72 bool generateBinned = false; // for binned generation
73 bool enableDetailedOutput = false; // for detailed output
74};
75
77
78void workspaceName = "combined",
79 const char *modelSBName = "ModelConfig", const char *modelBName = "",
80 const char *dataName = "obsData", int calcType = 0, /* 0 freq 1 hybrid, 2 asymptotic */
81 int testStatType = 3, /* 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided*/
82 int ntoys = 5000, bool useNC = false, const char *nuisPriorName = 0)
83{
84
85 bool noSystematics = optHT.noSystematics;
86 double nToysRatio = optHT.nToysRatio; // ratio Ntoys Null/ntoys ALT
87 double poiValue = optHT.poiValue; // change poi snapshot value for S+B model (needed for expected p0 values)
88 int printLevel = optHT.printLevel;
89 bool generateBinned = optHT.generateBinned; // for binned generation
90 bool enableDetOutput = optHT.enableDetailedOutput;
91
92 // Other Parameter to pass in tutorial
93 // apart from standard for filename, ws, modelconfig and data
94
95 // type = 0 Freq calculator
96 // type = 1 Hybrid calculator
97 // type = 2 Asymptotic calculator
98
99 // testStatType = 0 LEP
100 // = 1 Tevatron
101 // = 2 Profile Likelihood
102 // = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
103
104 // ntoys: number of toys to use
105
106 // useNumberCounting: set to true when using number counting events
107
108 // nuisPriorName: name of prior for the nuisance. This is often expressed as constraint term in the global model
109 // It is needed only when using the HybridCalculator (type=1)
110 // If not given by default the prior pdf from ModelConfig is used.
111
112 // extra options are available as global parameters of the macro. They major ones are:
113
114 // generateBinned generate binned data sets for toys (default is false) - be careful not to activate with
115 // a too large (>=3) number of observables
116 // nToyRatio ratio of S+B/B toys (default is 2)
117 // printLevel
118
119 // disable - can cause some problems
120 // ToyMCSampler::SetAlwaysUseMultiGen(true);
121
122 SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true);
123 ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true);
124 RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true);
125
126 // RooRandom::randomGenerator()->SetSeed(0);
127
128 // to change minimizers
129 // ~~~{.bash}
130 // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
131 // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
132 // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);
133 // ~~~
134
135 // -------------------------------------------------------
136 // First part is just to access a user-defined file
137 // or create the standard example file if it doesn't exist
138 const char *filename = "";
139 if (!strcmp(infile, "")) {
140 filename = "results/example_combined_GaussExample_model.root";
141 bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
142 // if file does not exists generate with histfactory
143 if (!fileExist) {
144 // Normally this would be run on the command line
145 cout << "will run standard hist2workspace example" << endl;
146 gROOT->ProcessLine(".! prepareHistFactory .");
147 gROOT->ProcessLine(".! hist2workspace config/example.xml");
148 cout << "\n\n---------------------" << endl;
149 cout << "Done creating example input" << endl;
150 cout << "---------------------\n\n" << endl;
151 }
152
153 } else
155
156 // Try to open the file
157 TFile *file = TFile::Open(filename);
158
159 // if input file was specified but not found, quit
160 if (!file) {
161 cout << "StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
162 return;
163 }
164
165 // -------------------------------------------------------
166 // Tutorial starts here
167 // -------------------------------------------------------
168
169 // get the workspace out of the file
171 if (!w) {
172 cout << "workspace not found" << endl;
173 return;
174 }
175 w->Print();
176
177 // get the modelConfig out of the file
179
180 // get the modelConfig out of the file
181 RooAbsData *data = w->data(dataName);
182
183 // make sure ingredients are found
184 if (!data || !sbModel) {
185 w->Print();
186 cout << "data or ModelConfig was not found" << endl;
187 return;
188 }
189 // make b model
191
192 // case of no systematics
193 // remove nuisance parameters from model
194 if (noSystematics) {
195 const RooArgSet *nuisPar = sbModel->GetNuisanceParameters();
196 if (nuisPar && nuisPar->getSize() > 0) {
197 std::cout << "StandardHypoTestInvDemo"
198 << " - Switch off all systematics by setting them constant to their initial values" << std::endl;
200 }
201 if (bModel) {
202 const RooArgSet *bnuisPar = bModel->GetNuisanceParameters();
203 if (bnuisPar)
205 }
206 }
207
208 if (!bModel) {
209 Info("StandardHypoTestInvDemo", "The background model %s does not exist", modelBName);
210 Info("StandardHypoTestInvDemo", "Copy it from ModelConfig %s and set POI to zero", modelSBName);
211 bModel = (ModelConfig *)sbModel->Clone();
212 bModel->SetName(TString(modelSBName) + TString("B_only"));
213 RooRealVar *var = dynamic_cast<RooRealVar *>(bModel->GetParametersOfInterest()->first());
214 if (!var)
215 return;
216 double oldval = var->getVal();
217 var->setVal(0);
218 // bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi")) );
219 bModel->SetSnapshot(RooArgSet(*var));
220 var->setVal(oldval);
221 }
222
223 if (!sbModel->GetSnapshot() || poiValue > 0) {
224 Info("StandardHypoTestDemo", "Model %s has no snapshot - make one using model poi", modelSBName);
225 RooRealVar *var = dynamic_cast<RooRealVar *>(sbModel->GetParametersOfInterest()->first());
226 if (!var)
227 return;
228 double oldval = var->getVal();
229 if (poiValue > 0)
230 var->setVal(poiValue);
231 // sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
232 sbModel->SetSnapshot(RooArgSet(*var));
233 if (poiValue > 0)
234 var->setVal(oldval);
235 // sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
236 }
237
238 // part 1, hypothesis testing
240 // null parameters must include snapshot of poi plus the nuisance values
241 RooArgSet nullParams(*bModel->GetSnapshot());
242 if (bModel->GetNuisanceParameters())
243 nullParams.add(*bModel->GetNuisanceParameters());
244
245 slrts->SetNullParameters(nullParams);
246 RooArgSet altParams(*sbModel->GetSnapshot());
247 if (sbModel->GetNuisanceParameters())
248 altParams.add(*sbModel->GetNuisanceParameters());
249 slrts->SetAltParameters(altParams);
250
252
254 new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
255 ropl->SetSubtractMLE(false);
256
257 if (testStatType == 3)
258 profll->SetOneSidedDiscovery(1);
259 profll->SetPrintLevel(printLevel);
260
261 if (enableDetOutput) {
262 slrts->EnableDetailedOutput();
263 profll->EnableDetailedOutput();
264 ropl->EnableDetailedOutput();
265 }
266
267 /* profll.SetReuseNLL(mOptimize);*/
268 /* slrts.SetReuseNLL(mOptimize);*/
269 /* ropl.SetReuseNLL(mOptimize);*/
270
271 AsymptoticCalculator::SetPrintLevel(printLevel);
272
274 // note here Null is B and Alt is S+B
275 if (calcType == 0)
277 else if (calcType == 1)
279 else if (calcType == 2)
281
282 if (calcType == 0) {
284 if (enableDetOutput)
285 ((FrequentistCalculator *)hypoCalc)->StoreFitInfo(true);
286 }
287 if (calcType == 1) {
289 // n. a. yetif (enableDetOutput) ((HybridCalculator*) hypoCalc)->StoreFitInfo(true);
290 }
291 if (calcType == 2) {
292 if (testStatType == 3)
293 ((AsymptoticCalculator *)hypoCalc)->SetOneSidedDiscovery(true);
294 if (testStatType != 2 && testStatType != 3)
295 Warning("StandardHypoTestDemo",
296 "Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");
297 }
298
299 // check for nuisance prior pdf in case of nuisance parameters
300 if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters())) {
301 RooAbsPdf *nuisPdf = 0;
302 if (nuisPriorName)
303 nuisPdf = w->pdf(nuisPriorName);
304 // use prior defined first in bModel (then in SbModel)
305 if (!nuisPdf) {
306 Info("StandardHypoTestDemo",
307 "No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model");
308 if (bModel->GetPdf() && bModel->GetObservables())
309 nuisPdf = RooStats::MakeNuisancePdf(*bModel, "nuisancePdf_bmodel");
310 else
311 nuisPdf = RooStats::MakeNuisancePdf(*sbModel, "nuisancePdf_sbmodel");
312 }
313 if (!nuisPdf) {
314 if (bModel->GetPriorPdf()) {
315 nuisPdf = bModel->GetPriorPdf();
316 Info("StandardHypoTestDemo",
317 "No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",
318 nuisPdf->GetName());
319 } else {
320 Error("StandardHypoTestDemo", "Cannot run Hybrid calculator because no prior on the nuisance parameter is "
321 "specified or can be derived");
322 return;
323 }
324 }
326 Info("StandardHypoTestDemo", "Using as nuisance Pdf ... ");
327 nuisPdf->Print();
328
329 const RooArgSet *nuisParams =
330 (bModel->GetNuisanceParameters()) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
331 std::unique_ptr<RooArgSet> np{nuisPdf->getObservables(*nuisParams)};
332 if (np->getSize() == 0) {
333 Warning("StandardHypoTestDemo",
334 "Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
335 }
336
337 ((HybridCalculator *)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf);
338 ((HybridCalculator *)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf);
339 }
340
341 /* hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());*/
342 /* hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());*/
343
344 ToyMCSampler *sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler();
345
346 if (sampler && (calcType == 0 || calcType == 1)) {
347
348 // look if pdf is number counting or extended
349 if (sbModel->GetPdf()->canBeExtended()) {
350 if (useNC)
351 Warning("StandardHypoTestDemo", "Pdf is extended: but number counting flag is set: ignore it ");
352 } else {
353 // for not extended pdf
354 if (!useNC) {
355 int nEvents = data->numEntries();
356 Info("StandardHypoTestDemo",
357 "Pdf is not extended: number of events to generate taken from observed data set is %d", nEvents);
358 sampler->SetNEventsPerToy(nEvents);
359 } else {
360 Info("StandardHypoTestDemo", "using a number counting pdf");
361 sampler->SetNEventsPerToy(1);
362 }
363 }
364
365 if (data->isWeighted() && !generateBinned) {
366 Info("StandardHypoTestDemo", "Data set is weighted, nentries = %d and sum of weights = %8.1f but toy "
367 "generation is unbinned - it would be faster to set generateBinned to true\n",
368 data->numEntries(), data->sumEntries());
369 }
370 if (generateBinned)
371 sampler->SetGenerateBinned(generateBinned);
372
373 // set the test statistic
374 if (testStatType == 0)
375 sampler->SetTestStatistic(slrts);
376 if (testStatType == 1)
377 sampler->SetTestStatistic(ropl);
378 if (testStatType == 2 || testStatType == 3)
379 sampler->SetTestStatistic(profll);
380 }
381
382 HypoTestResult *htr = hypoCalc->GetHypoTest();
383 htr->SetPValueIsRightTail(true);
384 htr->SetBackgroundAsAlt(false);
385 htr->Print(); // how to get meaningful CLs at this point?
386
387 delete sampler;
388 delete slrts;
389 delete ropl;
390 delete profll;
391
392 if (calcType != 2) {
393 HypoTestPlot *plot = new HypoTestPlot(*htr, 100);
394 plot->SetLogYaxis(true);
395 plot->Draw();
396 } else {
397 std::cout << "Asymptotic results " << std::endl;
398 }
399
400 // look at expected significances
401 // found median of S+B distribution
402 if (calcType != 2) {
403
404 SamplingDistribution *altDist = htr->GetAltDistribution();
405 HypoTestResult htExp("Expected Result");
406 htExp.Append(htr);
407 // find quantiles in alt (S+B) distribution
408 double p[5];
409 double q[5];
410 for (int i = 0; i < 5; ++i) {
411 double sig = -2 + i;
412 p[i] = ROOT::Math::normal_cdf(sig, 1);
413 }
414 std::vector<double> values = altDist->GetSamplingDistribution();
415 TMath::Quantiles(values.size(), 5, &values[0], q, p, false);
416
417 for (int i = 0; i < 5; ++i) {
418 htExp.SetTestStatisticData(q[i]);
419 double sig = -2 + i;
420 std::cout << " Expected p -value and significance at " << sig << " sigma = " << htExp.NullPValue()
421 << " significance " << htExp.Significance() << " sigma " << std::endl;
422 }
423 } else {
424 // case of asymptotic calculator
425 for (int i = 0; i < 5; ++i) {
426 double sig = -2 + i;
427 // sigma is inverted here
428 double pval = AsymptoticCalculator::GetExpectedPValues(htr->NullPValue(), htr->AlternatePValue(), -sig, false);
429 std::cout << " Expected p -value and significance at " << sig << " sigma = " << pval << " significance "
430 << ROOT::Math::normal_quantile_c(pval, 1) << " sigma " << std::endl;
431 }
432 }
433
434 // write result in a file in case of toys
435 bool writeResult = (calcType != 2);
436
437 if (enableDetOutput) {
438 writeResult = true;
439 Info("StandardHypoTestDemo", "Detailed output will be written in output result file");
440 }
441
442 if (htr != NULL && writeResult) {
443
444 // write to a file the results
445 const char *calcTypeName = (calcType == 0) ? "Freq" : (calcType == 1) ? "Hybr" : "Asym";
447 // strip the / from the filename
448
450 name.Replace(0, name.Last('/') + 1, "");
452
453 resultFileName, "RECREATE");
454 htr->Write();
455 Info("StandardHypoTestDemo", "HypoTestResult has been written in the file %s", resultFileName.Data());
456
457 fileOut->Close();
458 }
459}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
winID h TVirtualViewer3D TVirtualGLPainter p
winID h TVirtualViewer3D TVirtualGLPainter char TVirtualGLPainter plot
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t np
char name[80]
Definition TGX11.cxx:110
float * q
#define gROOT
Definition TROOT.h:406
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
Abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Variable that can be changed from the outside.
Definition RooRealVar.h:37
void setVal(double value) override
Set value of variable to 'value'.
Hypothesis Test Calculator based on the asymptotic formulae for the profile likelihood ratio.
Does a frequentist hypothesis test.
Same purpose as HybridCalculatorOriginal, but different implementation.
Common base class for the Hypothesis Test Calculators.
This class provides the plots for the result of a study performed with any of the HypoTestCalculatorG...
HypoTestResult is a base class for results from hypothesis tests.
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition ModelConfig.h:35
ProfileLikelihoodTestStat is an implementation of the TestStatistic interface that calculates the pro...
TestStatistic that returns the ratio of profiled likelihoods.
This class simply holds a sampling distribution of some test statistic.
TestStatistic class that returns -log(L[null] / L[alt]) where L is the likelihood.
ToyMCSampler is an implementation of the TestStatSampler interface.
Persistable container for RooFit projects.
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4131
Basic string class.
Definition TString.h:139
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1308
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
double normal_quantile_c(double z, double sigma)
Inverse ( ) of the cumulative distribution function of the upper tail of the normal (Gaussian) distri...
std::ostream & Info()
Definition hadd.cxx:171
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition CodegenImpl.h:64
Namespace for the RooStats classes.
Definition CodegenImpl.h:58
bool SetAllConstant(const RooAbsCollection &coll, bool constant=true)
utility function to set all variable constant in a collection (from G.
RooAbsPdf * MakeNuisancePdf(RooAbsPdf &pdf, const RooArgSet &observables, const char *name)
extract constraint terms from pdf
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=nullptr, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition TMath.cxx:1207