// @(#)root/roostats:$Id: BernsteinCorrection.cxx 31276 2009-11-18 15:06:42Z moneta$
// Author: Kyle Cranmer   28/07/2008

/*************************************************************************
*                                                                       *
* For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see$ROOTSYS/README/CREDITS.             *
*************************************************************************/

//_________________________________________________
/*
BEGIN_HTML
<p>
BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial
correction term.  This is useful for incorporating systematic variations to the nominal PDF.
The Bernstein basis polynomails are particularly appropriate because they are positive definite.
</p>
<p>
This tool was inspired by the work of Glen Cowan together with Stephan Horner, Sascha Caron,
Eilam Gross, and others.
The initial implementation is independent work.  The major step forward in the approach was
to provide a well defined algorithm that specifies the order of polynomial to be included
in the correction.  This is an emperical algorithm, so in addition to the nominal model it
needs either a real data set or a simulated one.  In the early work, the nominal model was taken
to be a histogram from Monte Carlo simulations, but in this implementation it is generalized to an
arbitrary PDF (which includes a RooHistPdf).  The algorithm basically consists of a
hypothesis test of an nth-order correction (null) against a n+1-th order correction (alternate).
The quantity q = -2 log LR is used to determine whether the n+1-th order correction is a major
improvement to the n-th order correction.  The distribution of q is expected to be roughly
\chi^2 with one degree of freedom if the n-th order correction is a good model for the data.
Thus, one only moves to the n+1-th order correction of q is relatively large.  The chance that
one moves from the n-th to the n+1-th order correction when the n-th order correction
(eg. a type 1 error) is sufficient is given by the Prob(\chi^2_1 > threshold).  The constructor
of this class allows you to directly set this tolerance (in terms of probability that the n+1-th
</p>
<p>
To do:
Add another method to the utility that will make the sampling distribution for -2 log lambda
for various m vs. m+1 order corrections using a nominal model and perhaps having two ways of
generating the toys (either via a histogram or via an independent model that is supposed to
reflect reality).  That will allow one to make plots like Glen has at the end of his DRAFT
very easily.
</p>
END_HTML
*/
//

#ifndef RooStats_BernsteinCorrection
#include "RooStats/BernsteinCorrection.h"
#endif

#include "RooGlobalFunc.h"
#include "RooDataSet.h"
#include "RooRealVar.h"
#include "RooAbsPdf.h"
#include "RooFit.h"
#include "RooFitResult.h"
#include "TMath.h"
#include <string>
#include <vector>
#include <stdio.h>
#include <sstream>
#include <iostream>

#include "RooProdPdf.h"
#include "RooNLLVar.h"
#include "RooWorkspace.h"
#include "RooDataHist.h"
#include "RooHistPdf.h"

#include "RooBernstein.h"

ClassImp(RooStats::BernsteinCorrection) ;

using namespace RooFit;
using namespace RooStats;

//____________________________________
BernsteinCorrection::BernsteinCorrection(Double_t tolerance):
fMaxCorrection(100), fTolerance(tolerance){
}

//____________________________________
Int_t BernsteinCorrection::ImportCorrectedPdf(RooWorkspace* wks,
const char* nominalName,
const char* varName,
const char* dataName){
// Main method for Bernstein correction.

// get ingredients out of workspace
RooRealVar* x = wks->var(varName);
RooAbsPdf* nominal = wks->pdf(nominalName);
RooAbsData* data = wks->data(dataName);

// initialize alg, by checking how well nominal model fits
RooFitResult* nominalResult = nominal->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1));
Double_t lastNll= nominalResult->minNll();

// setup a log
std::stringstream log;
log << "------ Begin Bernstein Correction Log --------" << endl;

// Local variables that we want to keep in scope after loop
RooArgList coeff;
vector<RooRealVar*> coefficients;
Double_t q = 1E6;
Int_t degree = -1;

// The while loop
bool keepGoing = true;
while( keepGoing) {
degree++;

// we need to generate names for vars on the fly
std::stringstream str;
str<<"_"<<degree;

RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
"Bernstein basis poly coefficient",
1., 0., fMaxCorrection);
coefficients.push_back(newCoef);

// make the polynomial correction term
RooBernstein* poly = new RooBernstein("poly", "Bernstein poly", *x, coeff);

// make the corrected PDF = nominal * poly
RooProdPdf* corrected = new RooProdPdf("corrected","",RooArgList(*nominal,*poly));

// check to see how well this correction fits
RooFitResult* result = corrected->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1));

// Hypothesis test between previous correction (null)
// and this one (alternate).  Use -2 log LR for test statistic
q = 2*(lastNll - result->minNll()); // -2 log lambda, goes like significance^2
// check if we should keep going based on rate of Type I error
keepGoing = (degree < 1 || TMath::Prob(q,1) < fTolerance);

if(!keepGoing){
// terminate loop, import corrected PDF
RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL) ;
wks->import(*corrected);
RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG) ;
} else {
// memory management
delete corrected;
delete poly;
}

// for the log
if(degree != 0){
log << "degree = " << degree
<< " -log L("<<degree-1<<") = " << lastNll
<< " -log L(" << degree <<") = " << result->minNll()
<< " q = " << q
<< " P(chi^2_1 > q) = " << TMath::Prob(q,1) << endl;;
}

// update last result for next iteration in loop
lastNll = result->minNll();

delete result;
}

log << "------ End Bernstein Correction Log --------" << endl;
cout << log.str();

return degree;
}

//____________________________________
void BernsteinCorrection::CreateQSamplingDist(RooWorkspace* wks,
const char* nominalName,
const char* varName,
const char* dataName,
TH1F* samplingDist,
TH1F* samplingDistExtra,
Int_t degree,
Int_t nToys){
// Create sampling distribution for q given degree-1 vs. degree corrections

// get ingredients out of workspace
RooRealVar* x = wks->var(varName);
RooAbsPdf* nominal = wks->pdf(nominalName);
RooAbsData* data = wks->data(dataName);

// setup a log
std::stringstream log;
log << "------ Begin Bernstein Correction Log --------" << endl;

// Local variables that we want to keep in scope after loop
RooArgList coeff; // n-th degree correction
RooArgList coeffNull; // n-1 correction
RooArgList coeffExtra; // n+1 correction
vector<RooRealVar*> coefficients;

cout << "make coefs" << endl;
for(int i = 0; i<=degree+1; ++i) {
// we need to generate names for vars on the fly
std::stringstream str;
str<<"_"<<i;

RooRealVar* newCoef = new RooRealVar(("c"+str.str()).c_str(),
"Bernstein basis poly coefficient",
1., 0., fMaxCorrection);

// keep three sets of coefficients for n-1, n, n+1 corrections
coefficients.push_back(newCoef);
}

coeffNull.Print();
coeff.Print();
// make the polynomial correction term
RooBernstein* poly
= new RooBernstein("poly", "Bernstein poly", *x, coeff);

// make the polynomial correction term
RooBernstein* polyNull
= new RooBernstein("polyNull", "Bernstein poly", *x, coeffNull);

// make the polynomial correction term
RooBernstein* polyExtra
= new RooBernstein("polyExtra", "Bernstein poly", *x, coeffExtra);

// make the corrected PDF = nominal * poly
RooProdPdf* corrected
= new RooProdPdf("corrected","",RooArgList(*nominal,*poly));

RooProdPdf* correctedNull
= new RooProdPdf("correctedNull","",RooArgList(*nominal,*polyNull));

RooProdPdf* correctedExtra
= new RooProdPdf("correctedExtra","",RooArgList(*nominal,*polyExtra));

cout << "made pdfs, make toy generator" << endl;

// makde a pDF to generate the toys
RooDataHist dataHist("dataHist","",*x,*data);
RooHistPdf toyGen("toyGen","",*x,dataHist);

//  TH1F* samplingDist = new TH1F("samplingDist","",20,0,10);
Double_t q = 0, qExtra = 0;
// do toys
for(int i=0; i<nToys; ++i){
cout << "on toy " << i << endl;
RooDataSet* tmpData = toyGen.generate(*x,data->numEntries());
// check to see how well this correction fits
RooFitResult* result
= corrected->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));

RooFitResult* resultNull
= correctedNull->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));

RooFitResult* resultExtra
= correctedExtra->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));

// Hypothesis test between previous correction (null)
// and this one (alternate).  Use -2 log LR for test statistic
q = 2*(resultNull->minNll() - result->minNll());

qExtra = 2*(result->minNll() - resultExtra->minNll());

samplingDist->Fill(q);
samplingDistExtra->Fill(qExtra);
cout << resultNull->minNll() << " " << result->minNll() << " " << q << endl;

delete tmpData;
delete result;
delete resultNull;
delete resultExtra;
}

//  return samplingDist;
}

BernsteinCorrection.cxx:1
BernsteinCorrection.cxx:2
BernsteinCorrection.cxx:3
BernsteinCorrection.cxx:4
BernsteinCorrection.cxx:5
BernsteinCorrection.cxx:6
BernsteinCorrection.cxx:7
BernsteinCorrection.cxx:8
BernsteinCorrection.cxx:9
BernsteinCorrection.cxx:10
BernsteinCorrection.cxx:11
BernsteinCorrection.cxx:12
BernsteinCorrection.cxx:13
BernsteinCorrection.cxx:14
BernsteinCorrection.cxx:15
BernsteinCorrection.cxx:16
BernsteinCorrection.cxx:17
BernsteinCorrection.cxx:18
BernsteinCorrection.cxx:19
BernsteinCorrection.cxx:20
BernsteinCorrection.cxx:21
BernsteinCorrection.cxx:22
BernsteinCorrection.cxx:23
BernsteinCorrection.cxx:24
BernsteinCorrection.cxx:25
BernsteinCorrection.cxx:26
BernsteinCorrection.cxx:27
BernsteinCorrection.cxx:28
BernsteinCorrection.cxx:29
BernsteinCorrection.cxx:30
BernsteinCorrection.cxx:31
BernsteinCorrection.cxx:32
BernsteinCorrection.cxx:33
BernsteinCorrection.cxx:34
BernsteinCorrection.cxx:35
BernsteinCorrection.cxx:36
BernsteinCorrection.cxx:37
BernsteinCorrection.cxx:38
BernsteinCorrection.cxx:39
BernsteinCorrection.cxx:40
BernsteinCorrection.cxx:41
BernsteinCorrection.cxx:42
BernsteinCorrection.cxx:43
BernsteinCorrection.cxx:44
BernsteinCorrection.cxx:45
BernsteinCorrection.cxx:46
BernsteinCorrection.cxx:47
BernsteinCorrection.cxx:48
BernsteinCorrection.cxx:49
BernsteinCorrection.cxx:50
BernsteinCorrection.cxx:51
BernsteinCorrection.cxx:52
BernsteinCorrection.cxx:53
BernsteinCorrection.cxx:54
BernsteinCorrection.cxx:55
BernsteinCorrection.cxx:56
BernsteinCorrection.cxx:57
BernsteinCorrection.cxx:58
BernsteinCorrection.cxx:59
BernsteinCorrection.cxx:60
BernsteinCorrection.cxx:61
BernsteinCorrection.cxx:62
BernsteinCorrection.cxx:63
BernsteinCorrection.cxx:64
BernsteinCorrection.cxx:65
BernsteinCorrection.cxx:66
BernsteinCorrection.cxx:67
BernsteinCorrection.cxx:68
BernsteinCorrection.cxx:69
BernsteinCorrection.cxx:70
BernsteinCorrection.cxx:71
BernsteinCorrection.cxx:72
BernsteinCorrection.cxx:73
BernsteinCorrection.cxx:74
BernsteinCorrection.cxx:75
BernsteinCorrection.cxx:76
BernsteinCorrection.cxx:77
BernsteinCorrection.cxx:78
BernsteinCorrection.cxx:79
BernsteinCorrection.cxx:80
BernsteinCorrection.cxx:81
BernsteinCorrection.cxx:82
BernsteinCorrection.cxx:83
BernsteinCorrection.cxx:84
BernsteinCorrection.cxx:85
BernsteinCorrection.cxx:86
BernsteinCorrection.cxx:87
BernsteinCorrection.cxx:88
BernsteinCorrection.cxx:89
BernsteinCorrection.cxx:90
BernsteinCorrection.cxx:91
BernsteinCorrection.cxx:92
BernsteinCorrection.cxx:93
BernsteinCorrection.cxx:94
BernsteinCorrection.cxx:95
BernsteinCorrection.cxx:96
BernsteinCorrection.cxx:97
BernsteinCorrection.cxx:98
BernsteinCorrection.cxx:99
BernsteinCorrection.cxx:100
BernsteinCorrection.cxx:101
BernsteinCorrection.cxx:102
BernsteinCorrection.cxx:103
BernsteinCorrection.cxx:104
BernsteinCorrection.cxx:105
BernsteinCorrection.cxx:106
BernsteinCorrection.cxx:107
BernsteinCorrection.cxx:108
BernsteinCorrection.cxx:109
BernsteinCorrection.cxx:110
BernsteinCorrection.cxx:111
BernsteinCorrection.cxx:112
BernsteinCorrection.cxx:113
BernsteinCorrection.cxx:114
BernsteinCorrection.cxx:115
BernsteinCorrection.cxx:116
BernsteinCorrection.cxx:117
BernsteinCorrection.cxx:118
BernsteinCorrection.cxx:119
BernsteinCorrection.cxx:120
BernsteinCorrection.cxx:121
BernsteinCorrection.cxx:122
BernsteinCorrection.cxx:123
BernsteinCorrection.cxx:124
BernsteinCorrection.cxx:125
BernsteinCorrection.cxx:126
BernsteinCorrection.cxx:127
BernsteinCorrection.cxx:128
BernsteinCorrection.cxx:129
BernsteinCorrection.cxx:130
BernsteinCorrection.cxx:131
BernsteinCorrection.cxx:132
BernsteinCorrection.cxx:133
BernsteinCorrection.cxx:134
BernsteinCorrection.cxx:135
BernsteinCorrection.cxx:136
BernsteinCorrection.cxx:137
BernsteinCorrection.cxx:138
BernsteinCorrection.cxx:139
BernsteinCorrection.cxx:140
BernsteinCorrection.cxx:141
BernsteinCorrection.cxx:142
BernsteinCorrection.cxx:143
BernsteinCorrection.cxx:144
BernsteinCorrection.cxx:145
BernsteinCorrection.cxx:146
BernsteinCorrection.cxx:147
BernsteinCorrection.cxx:148
BernsteinCorrection.cxx:149
BernsteinCorrection.cxx:150
BernsteinCorrection.cxx:151
BernsteinCorrection.cxx:152
BernsteinCorrection.cxx:153
BernsteinCorrection.cxx:154
BernsteinCorrection.cxx:155
BernsteinCorrection.cxx:156
BernsteinCorrection.cxx:157
BernsteinCorrection.cxx:158
BernsteinCorrection.cxx:159
BernsteinCorrection.cxx:160
BernsteinCorrection.cxx:161
BernsteinCorrection.cxx:162
BernsteinCorrection.cxx:163
BernsteinCorrection.cxx:164
BernsteinCorrection.cxx:165
BernsteinCorrection.cxx:166
BernsteinCorrection.cxx:167
BernsteinCorrection.cxx:168
BernsteinCorrection.cxx:169
BernsteinCorrection.cxx:170
BernsteinCorrection.cxx:171
BernsteinCorrection.cxx:172
BernsteinCorrection.cxx:173
BernsteinCorrection.cxx:174
BernsteinCorrection.cxx:175
BernsteinCorrection.cxx:176
BernsteinCorrection.cxx:177
BernsteinCorrection.cxx:178
BernsteinCorrection.cxx:179
BernsteinCorrection.cxx:180
BernsteinCorrection.cxx:181
BernsteinCorrection.cxx:182
BernsteinCorrection.cxx:183
BernsteinCorrection.cxx:184
BernsteinCorrection.cxx:185
BernsteinCorrection.cxx:186
BernsteinCorrection.cxx:187
BernsteinCorrection.cxx:188
BernsteinCorrection.cxx:189
BernsteinCorrection.cxx:190
BernsteinCorrection.cxx:191
BernsteinCorrection.cxx:192
BernsteinCorrection.cxx:193
BernsteinCorrection.cxx:194
BernsteinCorrection.cxx:195
BernsteinCorrection.cxx:196
BernsteinCorrection.cxx:197
BernsteinCorrection.cxx:198
BernsteinCorrection.cxx:199
BernsteinCorrection.cxx:200
BernsteinCorrection.cxx:201
BernsteinCorrection.cxx:202
BernsteinCorrection.cxx:203
BernsteinCorrection.cxx:204
BernsteinCorrection.cxx:205
BernsteinCorrection.cxx:206
BernsteinCorrection.cxx:207
BernsteinCorrection.cxx:208
BernsteinCorrection.cxx:209
BernsteinCorrection.cxx:210
BernsteinCorrection.cxx:211
BernsteinCorrection.cxx:212
BernsteinCorrection.cxx:213
BernsteinCorrection.cxx:214
BernsteinCorrection.cxx:215
BernsteinCorrection.cxx:216
BernsteinCorrection.cxx:217
BernsteinCorrection.cxx:218
BernsteinCorrection.cxx:219
BernsteinCorrection.cxx:220
BernsteinCorrection.cxx:221
BernsteinCorrection.cxx:222
BernsteinCorrection.cxx:223
BernsteinCorrection.cxx:224
BernsteinCorrection.cxx:225
BernsteinCorrection.cxx:226
BernsteinCorrection.cxx:227
BernsteinCorrection.cxx:228
BernsteinCorrection.cxx:229
BernsteinCorrection.cxx:230
BernsteinCorrection.cxx:231
BernsteinCorrection.cxx:232
BernsteinCorrection.cxx:233
BernsteinCorrection.cxx:234
BernsteinCorrection.cxx:235
BernsteinCorrection.cxx:236
BernsteinCorrection.cxx:237
BernsteinCorrection.cxx:238
BernsteinCorrection.cxx:239
BernsteinCorrection.cxx:240
BernsteinCorrection.cxx:241
BernsteinCorrection.cxx:242
BernsteinCorrection.cxx:243
BernsteinCorrection.cxx:244
BernsteinCorrection.cxx:245
BernsteinCorrection.cxx:246
BernsteinCorrection.cxx:247
BernsteinCorrection.cxx:248
BernsteinCorrection.cxx:249
BernsteinCorrection.cxx:250
BernsteinCorrection.cxx:251
BernsteinCorrection.cxx:252
BernsteinCorrection.cxx:253
BernsteinCorrection.cxx:254
BernsteinCorrection.cxx:255
BernsteinCorrection.cxx:256
BernsteinCorrection.cxx:257
BernsteinCorrection.cxx:258
BernsteinCorrection.cxx:259
BernsteinCorrection.cxx:260
BernsteinCorrection.cxx:261
BernsteinCorrection.cxx:262
BernsteinCorrection.cxx:263
BernsteinCorrection.cxx:264
BernsteinCorrection.cxx:265
BernsteinCorrection.cxx:266
BernsteinCorrection.cxx:267
BernsteinCorrection.cxx:268
BernsteinCorrection.cxx:269
BernsteinCorrection.cxx:270
BernsteinCorrection.cxx:271
BernsteinCorrection.cxx:272
BernsteinCorrection.cxx:273
BernsteinCorrection.cxx:274
BernsteinCorrection.cxx:275
BernsteinCorrection.cxx:276
BernsteinCorrection.cxx:277
BernsteinCorrection.cxx:278
BernsteinCorrection.cxx:279
BernsteinCorrection.cxx:280
BernsteinCorrection.cxx:281
BernsteinCorrection.cxx:282
BernsteinCorrection.cxx:283
BernsteinCorrection.cxx:284
BernsteinCorrection.cxx:285
BernsteinCorrection.cxx:286
BernsteinCorrection.cxx:287
BernsteinCorrection.cxx:288
BernsteinCorrection.cxx:289
BernsteinCorrection.cxx:290
BernsteinCorrection.cxx:291
BernsteinCorrection.cxx:292