ROOT   6.14/05 Reference Guide
BernsteinCorrection.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer 28/07/2008
3
4 /*************************************************************************
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. * 9 * For the list of contributors see$ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11
12 /** \class RooStats::BernsteinCorrection
13  \ingroup Roostats
14
15 BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial
16 correction term. This is useful for incorporating systematic variations to the nominal PDF.
17 The Bernstein basis polynomials are particularly appropriate because they are positive definite.
18
19 This tool was inspired by the work of Glen Cowan together with Stephan Horner, Sascha Caron,
20 Eilam Gross, and others.
21 The initial implementation is independent work. The major step forward in the approach was
22 to provide a well defined algorithm that specifies the order of polynomial to be included
23 in the correction. This is an empirical algorithm, so in addition to the nominal model it
24 needs either a real data set or a simulated one. In the early work, the nominal model was taken
25 to be a histogram from Monte Carlo simulations, but in this implementation it is generalized to an
26 arbitrary PDF (which includes a RooHistPdf). The algorithm basically consists of a
27 hypothesis test of an nth-order correction (null) against a n+1-th order correction (alternate).
28 The quantity q = -2 log LR is used to determine whether the n+1-th order correction is a major
29 improvement to the n-th order correction. The distribution of q is expected to be roughly
30 \f$\chi^2\f$ with one degree of freedom if the n-th order correction is a good model for the data.
31  Thus, one only moves to the n+1-th order correction of q is relatively large. The chance that
32 one moves from the n-th to the n+1-th order correction when the n-th order correction
33 (eg. a type 1 error) is sufficient is given by the Prob(\f$\chi^2_1\f$ > threshold). The constructor
34 of this class allows you to directly set this tolerance (in terms of probability that the n+1-th
36
37 To do:
38 Add another method to the utility that will make the sampling distribution for -2 log lambda
39 for various m vs. m+1 order corrections using a nominal model and perhaps having two ways of
40 generating the toys (either via a histogram or via an independent model that is supposed to
41  reflect reality). That will allow one to make plots like Glen has at the end of his DRAFT
42  very easily.
43
44 */
45
46
48
49 #include "RooGlobalFunc.h"
50 #include "RooDataSet.h"
51 #include "RooRealVar.h"
52 #include "RooAbsPdf.h"
53 #include "RooFit.h"
54 #include "RooFitResult.h"
55 #include "TMath.h"
56 #include <string>
57 #include <vector>
58 #include <stdio.h>
59 #include <sstream>
60 #include <iostream>
61
62 #include "RooEffProd.h"
63 #include "RooNLLVar.h"
64 #include "RooWorkspace.h"
65 #include "RooDataHist.h"
66 #include "RooHistPdf.h"
67
68 #include "RooBernstein.h"
69
70 #include "Math/MinimizerOptions.h"
71
72
74
75 using namespace RooFit;
76 using namespace RooStats;
77 using namespace std;
78
79 ////////////////////////////////////////////////////////////////////////////////
80
81 BernsteinCorrection::BernsteinCorrection(Double_t tolerance):
82  fMaxDegree(10), fMaxCorrection(100), fTolerance(tolerance){
83 }
84
85
86 ////////////////////////////////////////////////////////////////////////////////
87 /// Main method for Bernstein correction.
88
90  const char* nominalName,
91  const char* varName,
92  const char* dataName){
93  // get ingredients out of workspace
94  RooRealVar* x = wks->var(varName);
95  RooAbsPdf* nominal = wks->pdf(nominalName);
96  RooAbsData* data = wks->data(dataName);
97
98  if (!x || !nominal || !data) {
99  cout << "Error: wrong name for pdf or variable or dataset - return -1 " << std::endl;
100  return -1;
101  }
102
103  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Doing initial Fit with nominal model " << std::endl;
104
105  // initialize alg, by checking how well nominal model fits
108
109  RooFitResult* nominalResult = nominal->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
110  Double_t lastNll= nominalResult->minNll();
111
112  if (nominalResult->status() != 0 ) {
113  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Error fit with nominal model failed - exit" << std::endl;
114  return -1;
115  }
116
117  // setup a log
118  std::stringstream log;
119  log << "------ Begin Bernstein Correction Log --------" << endl;
120
121  // Local variables that we want to keep in scope after loop
122  RooArgList coeff;
123  vector<RooRealVar*> coefficients;
124  Double_t q = 1E6;
125  Int_t degree = -1;
126
127  // The while loop
128  bool keepGoing = true;
129  while( keepGoing ) {
130  degree++;
131
132  // we need to generate names for vars on the fly
133  std::stringstream str;
134  str<<"_"<<degree;
135
136  RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
137  "Bernstein basis poly coefficient",
138  1.0, 0., fMaxCorrection);
140  coefficients.push_back(newCoef);
141  // Since pdf is normalized - coefficient for degree 0 is fixed to be 1
142  if (degree == 0) {
143  newCoef->setVal(1);
144  newCoef->setConstant(1);
145  continue;
146  }
147
148  // make the polynomial correction term
149  RooBernstein* poly = new RooBernstein("poly", "Bernstein poly", *x, coeff);
150
151  // make the corrected PDF = nominal * poly
152  RooEffProd* corrected = new RooEffProd("corrected","",*nominal,*poly);
153
154  // check to see how well this correction fits
155  RooFitResult* result = corrected->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
156
157  if (result->status() != 0) {
158  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Error fit with corrected model failed" << std::endl;
159  return -1;
160  }
161
162
163  // Hypothesis test between previous correction (null)
164  // and this one (alternate). Use -2 log LR for test statistic
165  q = 2*(lastNll - result->minNll()); // -2 log lambda, goes like significance^2
166  // check if we should keep going based on rate of Type I error
167  keepGoing = (degree < 1 || TMath::Prob(q,1) < fTolerance );
168  if (degree >= fMaxDegree) keepGoing = false;
169
170  if(!keepGoing){
171  // terminate loop, import corrected PDF
172  //RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL) ;
173  wks->import(*corrected);
174  //RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG) ;
175  } else {
176  // memory management
177  delete corrected;
178  delete poly;
179  }
180
181  // for the log
182  if(degree != 0){
183  log << "degree = " << degree
184  << " -log L("<<degree-1<<") = " << lastNll
185  << " -log L(" << degree <<") = " << result->minNll()
186  << " q = " << q
187  << " P(chi^2_1 > q) = " << TMath::Prob(q,1) << endl;;
188  }
189
190  // update last result for next iteration in loop
191  lastNll = result->minNll();
192
193  delete result;
194  }
195
196  log << "------ End Bernstein Correction Log --------" << endl;
197  cout << log.str();
198
199  return degree;
200 }
201
202
203 ////////////////////////////////////////////////////////////////////////////////
204 /// Create sampling distribution for q given degree-1 vs. degree corrections
205
207  const char* nominalName,
208  const char* varName,
209  const char* dataName,
210  TH1F* samplingDist,
211  TH1F* samplingDistExtra,
212  Int_t degree,
213  Int_t nToys){
214  // get ingredients out of workspace
215  RooRealVar* x = wks->var(varName);
216  RooAbsPdf* nominal = wks->pdf(nominalName);
217  RooAbsData* data = wks->data(dataName);
218
219  if (!x || !nominal || !data) {
220  cout << "Error: wrong name for pdf or variable or dataset ! " << std::endl;
221  return;
222  }
223
224  // setup a log
225  std::stringstream log;
226  log << "------ Begin Bernstein Correction Log --------" << endl;
227
228  // Local variables that we want to keep in scope after loop
229  RooArgList coeff; // n-th degree correction
230  RooArgList coeffNull; // n-1 correction
231  RooArgList coeffExtra; // n+1 correction
232  vector<RooRealVar*> coefficients;
233
234  //cout << "make coefs" << endl;
235  for(int i = 0; i<=degree+1; ++i) {
236  // we need to generate names for vars on the fly
237  std::stringstream str;
238  str<<"_"<<i;
239
240  RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
241  "Bernstein basis poly coefficient",
242  1., 0., fMaxCorrection);
243
244  // keep three sets of coefficients for n-1, n, n+1 corrections
248  coefficients.push_back(newCoef);
249  }
250
251  // make the polynomial correction term
252  RooBernstein* poly
253  = new RooBernstein("poly", "Bernstein poly", *x, coeff);
254
255  // make the polynomial correction term
256  RooBernstein* polyNull
257  = new RooBernstein("polyNull", "Bernstein poly", *x, coeffNull);
258
259  // make the polynomial correction term
260  RooBernstein* polyExtra
261  = new RooBernstein("polyExtra", "Bernstein poly", *x, coeffExtra);
262
263  // make the corrected PDF = nominal * poly
264  RooEffProd* corrected
265  = new RooEffProd("corrected","",*nominal,*poly);
266
267  RooEffProd* correctedNull
268  = new RooEffProd("correctedNull","",*nominal,*polyNull);
269
270  RooEffProd* correctedExtra
271  = new RooEffProd("correctedExtra","",*nominal,*polyExtra);
272
273
274  cout << "made pdfs, make toy generator" << endl;
275
276  // make a PDF to generate the toys
277  RooDataHist dataHist("dataHist","",*x,*data);
278  RooHistPdf toyGen("toyGen","",*x,dataHist);
279
282
284  if (printLevel < 0) {
286  }
287
288
289  // TH1F* samplingDist = new TH1F("samplingDist","",20,0,10);
290  Double_t q = 0, qExtra = 0;
291  // do toys
292  for(int i=0; i<nToys; ++i){
293  cout << "on toy " << i << endl;
294
295  RooDataSet* tmpData = toyGen.generate(*x,data->numEntries());
296  // check to see how well this correction fits
297  RooFitResult* result
298  = corrected->fitTo(*tmpData,Save(),Minos(kFALSE),
299  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
300
301  RooFitResult* resultNull
302  = correctedNull->fitTo(*tmpData,Save(),Minos(kFALSE),
303  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
304
305
306  RooFitResult* resultExtra
307  = correctedExtra->fitTo(*tmpData,Save(),Minos(kFALSE),
308  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
309
310
311  // Hypothesis test between previous correction (null)
312  // and this one (alternate). Use -2 log LR for test statistic
313  q = 2*(resultNull->minNll() - result->minNll());
314
315  qExtra = 2*(result->minNll() - resultExtra->minNll());
316
317  samplingDist->Fill(q);
318  samplingDistExtra->Fill(qExtra);
319  if (printLevel > 0)
320  cout << "NLL Results: null " << resultNull->minNll() << " ref = " << result->minNll() << " extra" << resultExtra->minNll() << endl;
321
322
323  delete tmpData;
324  delete result;
325  delete resultNull;
326  delete resultExtra;
327  }
328
330
331  // return samplingDist;
332 }
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
virtual Bool_t add(const RooAbsArg &var, Bool_t silent=kFALSE)
Add the specified argument to list.
RooCmdArg PrintLevel(Int_t code)
RooCmdArg Minos(Bool_t flag=kTRUE)
Bernstein basis polynomials are positive-definite in the range [0,1].
Definition: RooBernstein.h:23
RooFit::MsgLevel globalKillBelow() const
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
int Int_t
Definition: RtypesCore.h:41
static RooMsgService & instance()
Return reference to singleton instance.
STL namespace.
Double_t Prob(Double_t chi2, Int_t ndf)
RooDataSet is a container class to hold N-dimensional binned data.
Definition: RooDataHist.h:40
Double_t x[n]
Definition: legend1.C:17
Int_t ImportCorrectedPdf(RooWorkspace *, const char *, const char *, const char *)
Main method for Bernstein correction.
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
RooHistPdf implements a probablity density function sampled from a multidimensional histogram...
Definition: RooHistPdf.h:28
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found...
virtual void setVal(Double_t value)
Set value of variable to &#39;value&#39;.
Definition: RooRealVar.cxx:204
static const std::string & DefaultMinimizerType()
void setConstant(Bool_t value=kTRUE)
RooCmdArg Minimizer(const char *type, const char *alg=0)
Double_t minNll() const
Definition: RooFitResult.h:98
void setGlobalKillBelow(RooFit::MsgLevel level)
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
void CreateQSamplingDist(RooWorkspace *wks, const char *nominalName, const char *varName, const char *dataName, TH1F *, TH1F *, Int_t degree, Int_t nToys=500)
Create sampling distribution for q given degree-1 vs. degree corrections.
const Bool_t kFALSE
Definition: RtypesCore.h:88
Namespace for the RooStats classes.
Definition: Asimov.h:20
RooCmdArg Hesse(Bool_t flag=kTRUE)
static constexpr double degree
#define ClassImp(name)
Definition: Rtypes.h:359
RooAbsPdf * pdf(const char *name) const
Retrieve p.d.f (RooAbsPdf) with given name. A null pointer is returned if not found.
double Double_t
Definition: RtypesCore.h:55
RooRealVar * var(const char *name) const
Retrieve real-valued variable (RooRealVar) with given name. A null pointer is returned if not found...
RooCmdArg Save(Bool_t flag=kTRUE)
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Bool_t import(const RooAbsArg &arg, const RooCmdArg &arg1=RooCmdArg(), const RooCmdArg &arg2=RooCmdArg(), const RooCmdArg &arg3=RooCmdArg(), const RooCmdArg &arg4=RooCmdArg(), const RooCmdArg &arg5=RooCmdArg(), const RooCmdArg &arg6=RooCmdArg(), const RooCmdArg &arg7=RooCmdArg(), const RooCmdArg &arg8=RooCmdArg(), const RooCmdArg &arg9=RooCmdArg())
Import a RooAbsArg object, e.g.
RooDataSet * generate(const RooArgSet &whatVars, Int_t nEvents, const RooCmdArg &arg1, const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none())
Generate a new dataset containing the specified variables with events sampled from our distribution...
Definition: RooAbsPdf.cxx:1725
virtual RooFitResult * fitTo(RooAbsData &data, const RooCmdArg &arg1=RooCmdArg::none(), const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none(), const RooCmdArg &arg6=RooCmdArg::none(), const RooCmdArg &arg7=RooCmdArg::none(), const RooCmdArg &arg8=RooCmdArg::none())
Fit PDF to given dataset.
Definition: RooAbsPdf.cxx:1079
BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial correction te...
float * q
Definition: THbookFile.cxx:87
double log(double)
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:285
Int_t status() const
Definition: RooFitResult.h:77