ROOT logo

From $ROOTSYS/tutorials/roostats/rs_bernsteinCorrection.C

/////////////////////////////////////////////////////////////////////////
//
// 'Bernstein Correction' RooStats tutorial macro
// author: Kyle Cranmer
// date March. 2009
//
// This tutorial shows usage of a the BernsteinCorrection utility in RooStats.
// The idea is that one has a distribution coming either from data or Monte Carlo
// (called "reality" in the macro) and a nominal model that is not sufficiently
// flexible to take into account the real distribution.  One wants to take into
// account the systematic associated with this imperfect modeling by augmenting
// the nominal model with some correction term (in this case a polynomial).
// The BernsteinCorrection utility will import into your workspace a corrected model
// given by nominal(x) * poly_N(x), where poly_N is an n-th order polynomial in
// the Bernstein basis.  The degree N of the polynomial is chosen by specifying the tolerance
// one has in adding an extra term to the polynomial.
// The Bernstein basis is nice because it only has positive-definite terms
// and works well with PDFs.
// Finally, the macro makes a plot of:
//  - the data (drawn from 'reality'),
//  - the best fit of the nominal model (blue)
//  - and the best fit corrected model.
/////////////////////////////////////////////////////////////////////////

#ifndef __CINT__
#include "RooGlobalFunc.h"
#endif
#include "RooDataSet.h"
#include "RooRealVar.h"
#include "RooConstVar.h"
#include "RooBernstein.h"
#include "TCanvas.h"
#include "RooAbsPdf.h"
#include "RooFit.h"
#include "RooFitResult.h"
#include "RooPlot.h"
#include <string>
#include <vector>
#include <stdio.h>
#include <sstream>
#include <iostream>

#include "RooProdPdf.h"
#include "RooAddPdf.h"
#include "RooGaussian.h"
#include "RooNLLVar.h"
#include "RooMinuit.h"
#include "RooProfileLL.h"
#include "RooWorkspace.h"

#include "RooStats/BernsteinCorrection.h"

// use this order for safety on library loading
using namespace RooFit;
using namespace RooStats;


//____________________________________
void rs_bernsteinCorrection(){

  // set range of observable
  Double_t lowRange = -1, highRange =5;

  // make a RooRealVar for the observable
  RooRealVar x("x", "x", lowRange, highRange);

  // true model
  RooGaussian narrow("narrow","",x,RooConst(0.), RooConst(.8));
  RooGaussian wide("wide","",x,RooConst(0.), RooConst(2.));
  RooAddPdf reality("reality","",RooArgList(narrow, wide), RooConst(0.8));

  RooDataSet* data = reality.generate(x,1000);

  // nominal model
  RooRealVar sigma("sigma","",1.,0,10);
  RooGaussian nominal("nominal","",x,RooConst(0.), sigma);

  RooWorkspace* wks = new RooWorkspace("myWorksspace");

  wks->import(*data, Rename("data"));
  wks->import(nominal);

  // use Minuit2
  ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); 

  // The tolerance sets the probability to add an unnecessary term.
  // lower tolerance will add fewer terms, while higher tolerance
  // will add more terms and provide a more flexible function.
  Double_t tolerance = 0.05;
  BernsteinCorrection bernsteinCorrection(tolerance);
  Int_t degree = bernsteinCorrection.ImportCorrectedPdf(wks,"nominal","x","data");

  if (degree < 0) {
     Error("rs_bernsteinCorrection","Bernstein correction failed ! ");
     return;
  }

  cout << " Correction based on Bernstein Poly of degree " << degree << endl;
  

  RooPlot* frame = x.frame();
  data->plotOn(frame);
  // plot the best fit nominal model in blue
  TString minimType =  ROOT::Math::MinimizerOptions::DefaultMinimizerType();
  nominal.fitTo(*data,PrintLevel(0),Minimizer(minimType));
  nominal.plotOn(frame);

  // plot the best fit corrected model in red
  RooAbsPdf* corrected = wks->pdf("corrected");
  if (!corrected) return;

  // fit corrected model
  corrected->fitTo(*data,PrintLevel(0),Minimizer(minimType) );
  corrected->plotOn(frame,LineColor(kRed));

  // plot the correction term (* norm constant) in dashed green
  // should make norm constant just be 1, not depend on binning of data
  RooAbsPdf* poly = wks->pdf("poly");
  if (poly) 
  poly->plotOn(frame,LineColor(kGreen), LineStyle(kDashed));

  // this is a switch to check the sampling distribution
  // of -2 log LR for two comparisons:
  // the first is for n-1 vs. n degree polynomial corrections
  // the second is for n vs. n+1 degree polynomial corrections
  // Here we choose n to be the one chosen by the tolerance
  // critereon above, eg. n = "degree" in the code.
  // Setting this to true is takes about 10 min.
  bool checkSamplingDist = true;
  int numToyMC = 20;  // increse this value for sensible results 

  TCanvas* c1 = new TCanvas();
  if(checkSamplingDist) {
    c1->Divide(1,2);
    c1->cd(1);
  }
  frame->Draw();
  gPad->Update(); 

  if(checkSamplingDist) {
    // check sampling dist
    ROOT::Math::MinimizerOptions::SetDefaultPrintLevel(-1); 
    TH1F* samplingDist = new TH1F("samplingDist","",20,0,10);
    TH1F* samplingDistExtra = new TH1F("samplingDistExtra","",20,0,10);
    bernsteinCorrection.CreateQSamplingDist(wks,"nominal","x","data",samplingDist, samplingDistExtra, degree,numToyMC);

    c1->cd(2);
    samplingDistExtra->SetLineColor(kRed);
    samplingDistExtra->Draw();
    samplingDist->Draw("same");
  }
}

 rs_bernsteinCorrection.C:1
 rs_bernsteinCorrection.C:2
 rs_bernsteinCorrection.C:3
 rs_bernsteinCorrection.C:4
 rs_bernsteinCorrection.C:5
 rs_bernsteinCorrection.C:6
 rs_bernsteinCorrection.C:7
 rs_bernsteinCorrection.C:8
 rs_bernsteinCorrection.C:9
 rs_bernsteinCorrection.C:10
 rs_bernsteinCorrection.C:11
 rs_bernsteinCorrection.C:12
 rs_bernsteinCorrection.C:13
 rs_bernsteinCorrection.C:14
 rs_bernsteinCorrection.C:15
 rs_bernsteinCorrection.C:16
 rs_bernsteinCorrection.C:17
 rs_bernsteinCorrection.C:18
 rs_bernsteinCorrection.C:19
 rs_bernsteinCorrection.C:20
 rs_bernsteinCorrection.C:21
 rs_bernsteinCorrection.C:22
 rs_bernsteinCorrection.C:23
 rs_bernsteinCorrection.C:24
 rs_bernsteinCorrection.C:25
 rs_bernsteinCorrection.C:26
 rs_bernsteinCorrection.C:27
 rs_bernsteinCorrection.C:28
 rs_bernsteinCorrection.C:29
 rs_bernsteinCorrection.C:30
 rs_bernsteinCorrection.C:31
 rs_bernsteinCorrection.C:32
 rs_bernsteinCorrection.C:33
 rs_bernsteinCorrection.C:34
 rs_bernsteinCorrection.C:35
 rs_bernsteinCorrection.C:36
 rs_bernsteinCorrection.C:37
 rs_bernsteinCorrection.C:38
 rs_bernsteinCorrection.C:39
 rs_bernsteinCorrection.C:40
 rs_bernsteinCorrection.C:41
 rs_bernsteinCorrection.C:42
 rs_bernsteinCorrection.C:43
 rs_bernsteinCorrection.C:44
 rs_bernsteinCorrection.C:45
 rs_bernsteinCorrection.C:46
 rs_bernsteinCorrection.C:47
 rs_bernsteinCorrection.C:48
 rs_bernsteinCorrection.C:49
 rs_bernsteinCorrection.C:50
 rs_bernsteinCorrection.C:51
 rs_bernsteinCorrection.C:52
 rs_bernsteinCorrection.C:53
 rs_bernsteinCorrection.C:54
 rs_bernsteinCorrection.C:55
 rs_bernsteinCorrection.C:56
 rs_bernsteinCorrection.C:57
 rs_bernsteinCorrection.C:58
 rs_bernsteinCorrection.C:59
 rs_bernsteinCorrection.C:60
 rs_bernsteinCorrection.C:61
 rs_bernsteinCorrection.C:62
 rs_bernsteinCorrection.C:63
 rs_bernsteinCorrection.C:64
 rs_bernsteinCorrection.C:65
 rs_bernsteinCorrection.C:66
 rs_bernsteinCorrection.C:67
 rs_bernsteinCorrection.C:68
 rs_bernsteinCorrection.C:69
 rs_bernsteinCorrection.C:70
 rs_bernsteinCorrection.C:71
 rs_bernsteinCorrection.C:72
 rs_bernsteinCorrection.C:73
 rs_bernsteinCorrection.C:74
 rs_bernsteinCorrection.C:75
 rs_bernsteinCorrection.C:76
 rs_bernsteinCorrection.C:77
 rs_bernsteinCorrection.C:78
 rs_bernsteinCorrection.C:79
 rs_bernsteinCorrection.C:80
 rs_bernsteinCorrection.C:81
 rs_bernsteinCorrection.C:82
 rs_bernsteinCorrection.C:83
 rs_bernsteinCorrection.C:84
 rs_bernsteinCorrection.C:85
 rs_bernsteinCorrection.C:86
 rs_bernsteinCorrection.C:87
 rs_bernsteinCorrection.C:88
 rs_bernsteinCorrection.C:89
 rs_bernsteinCorrection.C:90
 rs_bernsteinCorrection.C:91
 rs_bernsteinCorrection.C:92
 rs_bernsteinCorrection.C:93
 rs_bernsteinCorrection.C:94
 rs_bernsteinCorrection.C:95
 rs_bernsteinCorrection.C:96
 rs_bernsteinCorrection.C:97
 rs_bernsteinCorrection.C:98
 rs_bernsteinCorrection.C:99
 rs_bernsteinCorrection.C:100
 rs_bernsteinCorrection.C:101
 rs_bernsteinCorrection.C:102
 rs_bernsteinCorrection.C:103
 rs_bernsteinCorrection.C:104
 rs_bernsteinCorrection.C:105
 rs_bernsteinCorrection.C:106
 rs_bernsteinCorrection.C:107
 rs_bernsteinCorrection.C:108
 rs_bernsteinCorrection.C:109
 rs_bernsteinCorrection.C:110
 rs_bernsteinCorrection.C:111
 rs_bernsteinCorrection.C:112
 rs_bernsteinCorrection.C:113
 rs_bernsteinCorrection.C:114
 rs_bernsteinCorrection.C:115
 rs_bernsteinCorrection.C:116
 rs_bernsteinCorrection.C:117
 rs_bernsteinCorrection.C:118
 rs_bernsteinCorrection.C:119
 rs_bernsteinCorrection.C:120
 rs_bernsteinCorrection.C:121
 rs_bernsteinCorrection.C:122
 rs_bernsteinCorrection.C:123
 rs_bernsteinCorrection.C:124
 rs_bernsteinCorrection.C:125
 rs_bernsteinCorrection.C:126
 rs_bernsteinCorrection.C:127
 rs_bernsteinCorrection.C:128
 rs_bernsteinCorrection.C:129
 rs_bernsteinCorrection.C:130
 rs_bernsteinCorrection.C:131
 rs_bernsteinCorrection.C:132
 rs_bernsteinCorrection.C:133
 rs_bernsteinCorrection.C:134
 rs_bernsteinCorrection.C:135
 rs_bernsteinCorrection.C:136
 rs_bernsteinCorrection.C:137
 rs_bernsteinCorrection.C:138
 rs_bernsteinCorrection.C:139
 rs_bernsteinCorrection.C:140
 rs_bernsteinCorrection.C:141
 rs_bernsteinCorrection.C:142
 rs_bernsteinCorrection.C:143
 rs_bernsteinCorrection.C:144
 rs_bernsteinCorrection.C:145
 rs_bernsteinCorrection.C:146
 rs_bernsteinCorrection.C:147
 rs_bernsteinCorrection.C:148
 rs_bernsteinCorrection.C:149
 rs_bernsteinCorrection.C:150
 rs_bernsteinCorrection.C:151
 rs_bernsteinCorrection.C:152
 rs_bernsteinCorrection.C:153
 rs_bernsteinCorrection.C:154