Logo ROOT   6.08/07
Reference Guide
BernsteinCorrection.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer 28/07/2008
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////////////////////////
13 
14 /**
15 
16  \class RooStats::BernsteinCorrection
17  \ingroup Roostats
18 
19 BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial
20 correction term. This is useful for incorporating systematic variations to the nominal PDF.
21 The Bernstein basis polynomails are particularly appropriate because they are positive definite.
22 
23 This tool was inspired by the work of Glen Cowan together with Stephan Horner, Sascha Caron,
24 Eilam Gross, and others.
25 The initial implementation is independent work. The major step forward in the approach was
26 to provide a well defined algorithm that specifies the order of polynomial to be included
27 in the correction. This is an emperical algorithm, so in addition to the nominal model it
28 needs either a real data set or a simulated one. In the early work, the nominal model was taken
29 to be a histogram from Monte Carlo simulations, but in this implementation it is generalized to an
30 arbitrary PDF (which includes a RooHistPdf). The algorithm basically consists of a
31 hypothesis test of an nth-order correction (null) against a n+1-th order correction (alternate).
32 The quantity q = -2 log LR is used to determine whether the n+1-th order correction is a major
33 improvement to the n-th order correction. The distribution of q is expected to be roughly
34 \f$\chi^2\f$ with one degree of freedom if the n-th order correction is a good model for the data.
35  Thus, one only moves to the n+1-th order correction of q is relatively large. The chance that
36 one moves from the n-th to the n+1-th order correction when the n-th order correction
37 (eg. a type 1 error) is sufficient is given by the Prob(\f$\chi^2_1\f$ > threshold). The constructor
38 of this class allows you to directly set this tolerance (in terms of probability that the n+1-th
39  term is added unnecessarily).
40 
41 To do:
42 Add another method to the utility that will make the sampling distribution for -2 log lambda
43 for various m vs. m+1 order corrections using a nominal model and perhaps having two ways of
44 generating the toys (either via a histogram or via an independent model that is supposed to
45  reflect reality). That will allow one to make plots like Glen has at the end of his DRAFT
46  very easily.
47 
48 */
49 
50 
51 #ifndef RooStats_BernsteinCorrection
53 #endif
54 
55 #include "RooGlobalFunc.h"
56 #include "RooDataSet.h"
57 #include "RooRealVar.h"
58 #include "RooAbsPdf.h"
59 #include "RooFit.h"
60 #include "RooFitResult.h"
61 #include "TMath.h"
62 #include <string>
63 #include <vector>
64 #include <stdio.h>
65 #include <sstream>
66 #include <iostream>
67 
68 #include "RooEffProd.h"
69 #include "RooNLLVar.h"
70 #include "RooWorkspace.h"
71 #include "RooDataHist.h"
72 #include "RooHistPdf.h"
73 
74 #include "RooBernstein.h"
75 
76 #include "Math/MinimizerOptions.h"
77 
78 
80 
81 using namespace RooFit;
82 using namespace RooStats;
83 using namespace std;
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 
87 BernsteinCorrection::BernsteinCorrection(Double_t tolerance):
88  fMaxDegree(10), fMaxCorrection(100), fTolerance(tolerance){
89 }
90 
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 /// Main method for Bernstein correction.
94 
96  const char* nominalName,
97  const char* varName,
98  const char* dataName){
99  // get ingredients out of workspace
100  RooRealVar* x = wks->var(varName);
101  RooAbsPdf* nominal = wks->pdf(nominalName);
102  RooAbsData* data = wks->data(dataName);
103 
104  if (!x || !nominal || !data) {
105  cout << "Error: wrong name for pdf or variable or dataset - return -1 " << std::endl;
106  return -1;
107  }
108 
109  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Doing initial Fit with nominam model " << std::endl;
110 
111  // initialize alg, by checking how well nominal model fits
114 
115  RooFitResult* nominalResult = nominal->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
116  Double_t lastNll= nominalResult->minNll();
117 
118  if (nominalResult->status() != 0 ) {
119  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Error fit with nominal model failed - exit" << std::endl;
120  return -1;
121  }
122 
123  // setup a log
124  std::stringstream log;
125  log << "------ Begin Bernstein Correction Log --------" << endl;
126 
127  // Local variables that we want to keep in scope after loop
128  RooArgList coeff;
129  vector<RooRealVar*> coefficients;
130  Double_t q = 1E6;
131  Int_t degree = -1;
132 
133  // The while loop
134  bool keepGoing = true;
135  while( keepGoing ) {
136  degree++;
137 
138  // we need to generate names for vars on the fly
139  std::stringstream str;
140  str<<"_"<<degree;
141 
142  RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
143  "Bernstein basis poly coefficient",
144  1.0, 0., fMaxCorrection);
145  coeff.add(*newCoef);
146  coefficients.push_back(newCoef);
147  // Since pdf is normalized - coefficient for degree 0 is fixed to be 1
148  if (degree == 0) {
149  newCoef->setVal(1);
150  newCoef->setConstant(1);
151  continue;
152  }
153 
154  // make the polynomial correction term
155  RooBernstein* poly = new RooBernstein("poly", "Bernstein poly", *x, coeff);
156 
157  // make the corrected PDF = nominal * poly
158  RooEffProd* corrected = new RooEffProd("corrected","",*nominal,*poly);
159 
160  // check to see how well this correction fits
161  RooFitResult* result = corrected->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
162 
163  if (result->status() != 0) {
164  std::cout << "BernsteinCorrection::ImportCorrectedPdf - Error fit with corrected model failed" << std::endl;
165  return -1;
166  }
167 
168 
169  // Hypothesis test between previous correction (null)
170  // and this one (alternate). Use -2 log LR for test statistic
171  q = 2*(lastNll - result->minNll()); // -2 log lambda, goes like significance^2
172  // check if we should keep going based on rate of Type I error
173  keepGoing = (degree < 1 || TMath::Prob(q,1) < fTolerance );
174  if (degree >= fMaxDegree) keepGoing = false;
175 
176  if(!keepGoing){
177  // terminate loop, import corrected PDF
178  //RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL) ;
179  wks->import(*corrected);
180  //RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG) ;
181  } else {
182  // memory management
183  delete corrected;
184  delete poly;
185  }
186 
187  // for the log
188  if(degree != 0){
189  log << "degree = " << degree
190  << " -log L("<<degree-1<<") = " << lastNll
191  << " -log L(" << degree <<") = " << result->minNll()
192  << " q = " << q
193  << " P(chi^2_1 > q) = " << TMath::Prob(q,1) << endl;;
194  }
195 
196  // update last result for next iteration in loop
197  lastNll = result->minNll();
198 
199  delete result;
200  }
201 
202  log << "------ End Bernstein Correction Log --------" << endl;
203  cout << log.str();
204 
205  return degree;
206 }
207 
208 
209 ////////////////////////////////////////////////////////////////////////////////
210 /// Create sampling distribution for q given degree-1 vs. degree corrections
211 
213  const char* nominalName,
214  const char* varName,
215  const char* dataName,
216  TH1F* samplingDist,
217  TH1F* samplingDistExtra,
218  Int_t degree,
219  Int_t nToys){
220  // get ingredients out of workspace
221  RooRealVar* x = wks->var(varName);
222  RooAbsPdf* nominal = wks->pdf(nominalName);
223  RooAbsData* data = wks->data(dataName);
224 
225  if (!x || !nominal || !data) {
226  cout << "Error: wrong name for pdf or variable or dataset ! " << std::endl;
227  return;
228  }
229 
230  // setup a log
231  std::stringstream log;
232  log << "------ Begin Bernstein Correction Log --------" << endl;
233 
234  // Local variables that we want to keep in scope after loop
235  RooArgList coeff; // n-th degree correction
236  RooArgList coeffNull; // n-1 correction
237  RooArgList coeffExtra; // n+1 correction
238  vector<RooRealVar*> coefficients;
239 
240  //cout << "make coefs" << endl;
241  for(int i = 0; i<=degree+1; ++i) {
242  // we need to generate names for vars on the fly
243  std::stringstream str;
244  str<<"_"<<i;
245 
246  RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
247  "Bernstein basis poly coefficient",
248  1., 0., fMaxCorrection);
249 
250  // keep three sets of coefficients for n-1, n, n+1 corrections
251  if(i<degree) coeffNull.add(*newCoef);
252  if(i<=degree) coeff.add(*newCoef);
253  coeffExtra.add(*newCoef);
254  coefficients.push_back(newCoef);
255  }
256 
257  // make the polynomial correction term
258  RooBernstein* poly
259  = new RooBernstein("poly", "Bernstein poly", *x, coeff);
260 
261  // make the polynomial correction term
262  RooBernstein* polyNull
263  = new RooBernstein("polyNull", "Bernstein poly", *x, coeffNull);
264 
265  // make the polynomial correction term
266  RooBernstein* polyExtra
267  = new RooBernstein("polyExtra", "Bernstein poly", *x, coeffExtra);
268 
269  // make the corrected PDF = nominal * poly
270  RooEffProd* corrected
271  = new RooEffProd("corrected","",*nominal,*poly);
272 
273  RooEffProd* correctedNull
274  = new RooEffProd("correctedNull","",*nominal,*polyNull);
275 
276  RooEffProd* correctedExtra
277  = new RooEffProd("correctedExtra","",*nominal,*polyExtra);
278 
279 
280  cout << "made pdfs, make toy generator" << endl;
281 
282  // makde a pDF to generate the toys
283  RooDataHist dataHist("dataHist","",*x,*data);
284  RooHistPdf toyGen("toyGen","",*x,dataHist);
285 
288 
290  if (printLevel < 0) {
292  }
293 
294 
295  // TH1F* samplingDist = new TH1F("samplingDist","",20,0,10);
296  Double_t q = 0, qExtra = 0;
297  // do toys
298  for(int i=0; i<nToys; ++i){
299  cout << "on toy " << i << endl;
300 
301  RooDataSet* tmpData = toyGen.generate(*x,data->numEntries());
302  // check to see how well this correction fits
304  = corrected->fitTo(*tmpData,Save(),Minos(kFALSE),
305  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
306 
307  RooFitResult* resultNull
308  = correctedNull->fitTo(*tmpData,Save(),Minos(kFALSE),
309  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
310 
311 
312  RooFitResult* resultExtra
313  = correctedExtra->fitTo(*tmpData,Save(),Minos(kFALSE),
314  Hesse(kFALSE),PrintLevel(printLevel),Minimizer(minimType));
315 
316 
317  // Hypothesis test between previous correction (null)
318  // and this one (alternate). Use -2 log LR for test statistic
319  q = 2*(resultNull->minNll() - result->minNll());
320 
321  qExtra = 2*(result->minNll() - resultExtra->minNll());
322 
323  samplingDist->Fill(q);
324  samplingDistExtra->Fill(qExtra);
325  if (printLevel > 0)
326  cout << "NLL Results: null " << resultNull->minNll() << " ref = " << result->minNll() << " extra" << resultExtra->minNll() << endl;
327 
328 
329  delete tmpData;
330  delete result;
331  delete resultNull;
332  delete resultExtra;
333  }
334 
336 
337  // return samplingDist;
338 }
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
virtual Bool_t add(const RooAbsArg &var, Bool_t silent=kFALSE)
Add the specified argument to list.
RooCmdArg PrintLevel(Int_t code)
RooCmdArg Minos(Bool_t flag=kTRUE)
Bernstein basis polynomials are positive-definite in the range [0,1].
Definition: RooBernstein.h:23
RooFit::MsgLevel globalKillBelow() const
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: Rtypes.h:92
static RooMsgService & instance()
Return reference to singleton instance.
STL namespace.
Double_t Prob(Double_t chi2, Int_t ndf)
Computation of the probability for a certain Chi-squared (chi2) and number of degrees of freedom (ndf...
Definition: TMath.cxx:624
RooDataSet is a container class to hold N-dimensional binned data.
Definition: RooDataHist.h:40
Double_t x[n]
Definition: legend1.C:17
Int_t ImportCorrectedPdf(RooWorkspace *, const char *, const char *, const char *)
Main method for Bernstein correction.
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:37
RooHistPdf implements a probablity density function sampled from a multidimensional histogram...
Definition: RooHistPdf.h:28
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found...
virtual void setVal(Double_t value)
Set value of variable to &#39;value&#39;.
Definition: RooRealVar.cxx:205
static const std::string & DefaultMinimizerType()
void setConstant(Bool_t value=kTRUE)
RooCmdArg Minimizer(const char *type, const char *alg=0)
Double_t minNll() const
Definition: RooFitResult.h:97
void setGlobalKillBelow(RooFit::MsgLevel level)
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
void CreateQSamplingDist(RooWorkspace *wks, const char *nominalName, const char *varName, const char *dataName, TH1F *, TH1F *, Int_t degree, Int_t nToys=500)
Create sampling distribution for q given degree-1 vs. degree corrections.
Namespace for the RooStats classes.
Definition: Asimov.h:20
RooCmdArg Hesse(Bool_t flag=kTRUE)
#define ClassImp(name)
Definition: Rtypes.h:279
RooAbsPdf * pdf(const char *name) const
Retrieve p.d.f (RooAbsPdf) with given name. A null pointer is returned if not found.
double Double_t
Definition: RtypesCore.h:55
RooRealVar * var(const char *name) const
Retrieve real-valued variable (RooRealVar) with given name. A null pointer is returned if not found...
RooCmdArg Save(Bool_t flag=kTRUE)
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Bool_t import(const RooAbsArg &arg, const RooCmdArg &arg1=RooCmdArg(), const RooCmdArg &arg2=RooCmdArg(), const RooCmdArg &arg3=RooCmdArg(), const RooCmdArg &arg4=RooCmdArg(), const RooCmdArg &arg5=RooCmdArg(), const RooCmdArg &arg6=RooCmdArg(), const RooCmdArg &arg7=RooCmdArg(), const RooCmdArg &arg8=RooCmdArg(), const RooCmdArg &arg9=RooCmdArg())
Import a RooAbsArg object, e.g.
RooDataSet * generate(const RooArgSet &whatVars, Int_t nEvents, const RooCmdArg &arg1, const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none())
Generate a new dataset containing the specified variables with events sampled from our distribution...
Definition: RooAbsPdf.cxx:1702
virtual RooFitResult * fitTo(RooAbsData &data, const RooCmdArg &arg1=RooCmdArg::none(), const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none(), const RooCmdArg &arg6=RooCmdArg::none(), const RooCmdArg &arg7=RooCmdArg::none(), const RooCmdArg &arg8=RooCmdArg::none())
Fit PDF to given dataset.
Definition: RooAbsPdf.cxx:1056
double result[121]
BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial correction te...
float * q
Definition: THbookFile.cxx:87
double log(double)
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:42
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:269
Int_t status() const
Definition: RooFitResult.h:76