doc/v622/rf611__weightedfits_8C_source.html

/// \file

/// \ingroup tutorial_roofit

/// \notebook -js

///

/// Likelihood and minimization: Parameter uncertainties for weighted unbinned ML fits

///

/// ## Parameter uncertainties for weighted unbinned ML fits

///

/// Based on example from https://arxiv.org/abs/1911.01303

///

/// This example compares different approaches to determining parameter uncertainties in weighted unbinned maximum likelihood fits.

/// Performing a weighted unbinned maximum likelihood fits can be useful to account for acceptance effects and to statistically subtract background events using the sPlot formalism.

/// It is however well known that the inverse Hessian matrix does not yield parameter uncertainties with correct coverage in the presence of event weights.

/// Three approaches to the determination of parameter uncertainties are compared in this example:

///

/// 1. Using the inverse weighted Hessian matrix [`SumW2Error(false)`]

///

/// 2. Using the expression [`SumW2Error(true)`]

///    \f[

///      V_{ij} = H_{ik}^{-1} C_{kl} H_{lj}^{-1}

///    \f]

///    where H is the weighted Hessian matrix and C is the Hessian matrix with squared weights

///

/// 3. The asymptotically correct approach (for details please see https://arxiv.org/abs/1911.01303) [`Asymptotic(true)`]

///    \f[

///      V_{ij} = H_{ik}^{-1} D_{kl} H_{lj}^{-1}

///    \f]

///    where H is the weighted Hessian matrix and D is given by

///    \f[

///      D_{kl} = \sum_{e=1}^{N} w_e^2 \frac{\partial \log(P)}{\partial \lambda_k}\frac{\partial \log(P)}{\partial \lambda_l}

///    \f]

///    with the event weight \f$w_e\f$.

///

/// The example performs the fit of a second order polynomial in the angle cos(theta) [-1,1] to a weighted data set.

/// The polynomial is given by

/// \f[

///   P = \frac{ 1 + c_0 \cdot \cos(\theta) + c_1 \cdot \cos(\theta) \cdot \cos(\theta) }{\mathrm{Norm}}

/// \f]

/// The two coefficients \f$ c_0 \f$ and \f$ c_1 \f$ and their uncertainties are to be determined in the fit.

///

/// The per-event weight is used to correct for an acceptance effect, two different acceptance models can be studied:

/// - `acceptancemodel==1`: eff = \f$ 0.3 + 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$

/// - `acceptancemodel==2`: eff = \f$ 1.0 - 0.7 \cdot \cos(\theta) \cdot \cos(\theta) \f$

/// The data is generated to be flat before the acceptance effect.

///

/// The performance of the different approaches to determine parameter uncertainties is compared using the pull distributions from a large number of pseudoexperiments.

/// The pull is defined as \f$ (\lambda_i - \lambda_{gen})/\sigma(\lambda_i) \f$, where \f$ \lambda_i \f$ is the fitted parameter and \f$ \sigma(\lambda_i) \f$ its

/// uncertainty for pseudoexperiment number i.

/// If the fit is unbiased and the parameter uncertainties are estimated correctly, the pull distribution should be a Gaussian centered around zero with a width of one.

///

/// \macro_image

/// \macro_output

/// \macro_code

///

/// \date 11/2019

/// \author Christoph Langenbruch


#include "TH1D.h"

#include "TCanvas.h"

#include "TROOT.h"

#include "TStyle.h"

#include "TRandom3.h"

#include "TLegend.h"

#include "RooRealVar.h"

#include "RooFitResult.h"

#include "RooDataSet.h"

#include "RooPolynomial.h"


using namespace RooFit;


int rf611_weightedfits(int acceptancemodel=2) {

  // I n i t i a l i s a t i o n   a n d   S e t u p

  //------------------------------------------------


  //plotting options

  gStyle->SetPaintTextFormat(".1f");

  gStyle->SetEndErrorSize(6.0);

  gStyle->SetTitleSize(0.05, "XY");

  gStyle->SetLabelSize(0.05, "XY");

  gStyle->SetTitleOffset(0.9, "XY");

  gStyle->SetTextSize(0.05);

  gStyle->SetPadLeftMargin(0.125);

  gStyle->SetPadBottomMargin(0.125);

  gStyle->SetPadTopMargin(0.075);

  gStyle->SetPadRightMargin(0.075);

  gStyle->SetMarkerStyle(20);

  gStyle->SetMarkerSize(1.0);

  gStyle->SetHistLineWidth(2.0);

  gStyle->SetHistLineColor(1);


  //initialise TRandom3

  TRandom3* rnd = new TRandom3();

  rnd->SetSeed(191101303);


  //accepted events and events weighted to account for the acceptance

  TH1D* haccepted = new TH1D("haccepted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);

  TH1D* hweighted = new TH1D("hweighted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);

  //histograms holding pull distributions

  //using the inverse Hessian matrix

  TH1D* hc0pull1 = new TH1D("hc0pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);

  TH1D* hc1pull1 = new TH1D("hc1pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);

  //using the correction with the Hessian matrix with squared weights

  TH1D* hc0pull2 = new TH1D("hc0pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);

  TH1D* hc1pull2 = new TH1D("hc1pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);

  //asymptotically correct approach

  TH1D* hc0pull3 = new TH1D("hc0pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);

  TH1D* hc1pull3 = new TH1D("hc1pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);


  //number of pseudoexperiments (toys) and number of events per pseudoexperiment

  constexpr unsigned int ntoys = 500;

  constexpr unsigned int nstats = 5000;

  //parameters used in the generation

  constexpr double c0gen = 0.0;

  constexpr double c1gen = 0.0;


  // Silence fitting and minimisation messages

  auto& msgSv = RooMsgService::instance();

  msgSv.getStream(1).removeTopic(RooFit::Minimization);

  msgSv.getStream(1).removeTopic(RooFit::Fitting);


  std::cout << "Running " << ntoys*3 << " toy fits ..." << std::endl;


  // M a i n   l o o p :   r u n   p s e u d o e x p e r i m e n t s

  //----------------------------------------------------------------

  for (unsigned int i=0; i<ntoys; i++) {

    //S e t u p   p a r a m e t e r s   a n d   P D F

    //-----------------------------------------------

    //angle theta and the weight to account for the acceptance effect

    RooRealVar costheta("costheta","costheta", -1.0, 1.0);

    RooRealVar weight("weight","weight", 0.0, 1000.0);


    //initialise parameters to fit

    RooRealVar c0("c0","0th-order coefficient", c0gen, -1.0, 1.0);

    RooRealVar c1("c1","1st-order coefficient", c1gen, -1.0, 1.0);

    c0.setError(0.01);

    c1.setError(0.01);

    //create simple second-order polynomial as probability density function

    RooPolynomial pol("pol", "pol", costheta, RooArgList(c0, c1), 1);


    //G e n e r a t e   d a t a   s e t   f o r   p s e u d o e x p e r i m e n t   i

    //-------------------------------------------------------------------------------

    RooDataSet data("data","data",RooArgSet(costheta, weight), WeightVar("weight"));

    //generate nstats events

    for (unsigned int j=0; j<nstats; j++) {

      bool finished = false;

      //use simple accept/reject for generation

      while (!finished) {

        costheta = 2.0*rnd->Rndm()-1.0;

        //efficiency for the specific value of cos(theta)

        double eff = 1.0;

        if (acceptancemodel == 1)

          eff = 1.0 - 0.7 * costheta.getVal()*costheta.getVal();

        else

          eff = 0.3 + 0.7 * costheta.getVal()*costheta.getVal();

        //use 1/eff as weight to account for acceptance

        weight = 1.0/eff;

        //accept/reject

        if (10.0*rnd->Rndm() < eff*pol.getVal())

          finished = true;

      }

      haccepted->Fill(costheta.getVal());

      hweighted->Fill(costheta.getVal(), weight.getVal());

      data.add(RooArgSet(costheta, weight), weight.getVal());

    }


    //F i t   t o y   u s i n g   t h e   t h r e e   d i f f e r e n t   a p p r o a c h e s   t o   u n c e r t a i n t y   d e t e r m i n a t i o n

    //-------------------------------------------------------------------------------------------------------------------------------------------------

    //this uses the inverse weighted Hessian matrix

    RooFitResult* result = pol.fitTo(data, Save(true), SumW2Error(false), PrintLevel(-1), BatchMode(true));

    hc0pull1->Fill((c0.getVal()-c0gen)/c0.getError());

    hc1pull1->Fill((c1.getVal()-c1gen)/c1.getError());


    //this uses the correction with the Hesse matrix with squared weights

    result = pol.fitTo(data, Save(true), SumW2Error(true), PrintLevel(-1), BatchMode(true));

    hc0pull2->Fill((c0.getVal()-c0gen)/c0.getError());

    hc1pull2->Fill((c1.getVal()-c1gen)/c1.getError());


    //this uses the asymptotically correct approach

    result = pol.fitTo(data, Save(true), AsymptoticError(true), PrintLevel(-1), BatchMode(true));

    hc0pull3->Fill((c0.getVal()-c0gen)/c0.getError());

    hc1pull3->Fill((c1.getVal()-c1gen)/c1.getError());

  }


  std::cout << "... done." << std::endl;


  // P l o t   o u t p u t   d i s t r i b u t i o n s

  //--------------------------------------------------


  //plot accepted (weighted) events

  gStyle->SetOptStat(0);

  gStyle->SetOptFit(0);

  TCanvas* cevents = new TCanvas("cevents", "cevents", 800, 600);

  cevents->cd(1);

  hweighted->SetMinimum(0.0);

  hweighted->SetLineColor(2);

  hweighted->Draw("hist");

  haccepted->Draw("same hist");

  TLegend* leg = new TLegend(0.6, 0.8, 0.9, 0.9);

  leg->AddEntry(haccepted, "Accepted");

  leg->AddEntry(hweighted, "Weighted");

  leg->Draw();

  cevents->Update();


  //plot pull distributions

  TCanvas* cpull = new TCanvas("cpull", "cpull", 1200, 800);

  cpull->Divide(3,2);

  cpull->cd(1);

  gStyle->SetOptStat(1100);

  gStyle->SetOptFit(11);

  hc0pull1->Fit("gaus");

  hc0pull1->Draw("ep");

  cpull->cd(2);

  hc0pull2->Fit("gaus");

  hc0pull2->Draw("ep");

  cpull->cd(3);

  hc0pull3->Fit("gaus");

  hc0pull3->Draw("ep");

  cpull->cd(4);

  hc1pull1->Fit("gaus");

  hc1pull1->Draw("ep");

  cpull->cd(5);

  hc1pull2->Fit("gaus");

  hc1pull2->Draw("ep");

  cpull->cd(6);

  hc1pull3->Fit("gaus");

  hc1pull3->Draw("ep");

  cpull->Update();


  return 0;

}

RooDataSet.h

RooFitResult.h

RooPolynomial.h

RooRealVar.h

TCanvas.h

TH1D.h

TLegend.h

TROOT.h

TRandom3.h

TStyle.h

gStyle
R__EXTERN TStyle * gStyle
Definition: TStyle.h:410

RooArgList
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgList.h:21

RooArgSet
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28

RooDataSet
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:33

RooFitResult
RooFitResult is a container class to hold the input and output of a PDF fit to a dataset.
Definition: RooFitResult.h:40

RooMsgService::instance
static RooMsgService & instance()
Return reference to singleton instance.
Definition: RooMsgService.cxx:353

RooPolynomial
RooPolynomial implements a polynomial p.d.f of the form.
Definition: RooPolynomial.h:28

RooRealVar
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:35

TAttLine::SetLineColor
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40

TAttMarker::SetMarkerStyle
virtual void SetMarkerStyle(Style_t mstyle=1)
Set the marker style.
Definition: TAttMarker.h:40

TAttMarker::SetMarkerSize
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:41

TAttText::SetTextSize
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition: TAttText.h:46

TCanvas
The Canvas class.
Definition: TCanvas.h:27

TCanvas::Update
virtual void Update()
Update canvas pad buffers.
Definition: TCanvas.cxx:2433

TCanvas::cd
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:701

TH1D
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614

TH1::Fit
virtual TFitResultPtr Fit(const char *formula, Option_t *option="", Option_t *goption="", Double_t xmin=0, Double_t xmax=0)
Fit histogram with function fname.
Definition: TH1.cxx:3808

TH1::Fill
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3275

TH1::SetMinimum
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:395

TH1::Draw
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2998

TLegend
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:23

TPad::Divide
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1165

TRandom3
Random number generator class based on M.
Definition: TRandom3.h:27

TRandom3::Rndm
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:100

TRandom3::SetSeed
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:207

TStyle::SetPadTopMargin
void SetPadTopMargin(Float_t margin=0.1)
Definition: TStyle.h:340

TStyle::SetOptStat
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1590

TStyle::SetPadBottomMargin
void SetPadBottomMargin(Float_t margin=0.1)
Definition: TStyle.h:339

TStyle::SetPaintTextFormat
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:367

TStyle::SetEndErrorSize
void SetEndErrorSize(Float_t np=2)
Set the size (in pixels) of the small lines drawn at the end of the error bars (TH1 or TGraphErrors).
Definition: TStyle.cxx:1289

TStyle::SetPadRightMargin
void SetPadRightMargin(Float_t margin=0.1)
Definition: TStyle.h:342

TStyle::SetTitleOffset
void SetTitleOffset(Float_t offset=1, Option_t *axis="X")
Specify a parameter offset to control the distance between the axis and the axis title.
Definition: TStyle.cxx:1749

TStyle::SetPadLeftMargin
void SetPadLeftMargin(Float_t margin=0.1)
Definition: TStyle.h:341

TStyle::SetHistLineColor
void SetHistLineColor(Color_t color=1)
Definition: TStyle.h:361

TStyle::SetTitleSize
void SetTitleSize(Float_t size=0.02, Option_t *axis="X")
Definition: TStyle.cxx:1768

TStyle::SetHistLineWidth
void SetHistLineWidth(Width_t width=1)
Definition: TStyle.h:364

TStyle::SetLabelSize
void SetLabelSize(Float_t size=0.04, Option_t *axis="X")
Set size of axis labels.
Definition: TStyle.cxx:1393

TStyle::SetOptFit
void SetOptFit(Int_t fit=1)
The type of information about fit parameters printed in the histogram statistics box can be selected ...
Definition: TStyle.cxx:1542

RooFit::WeightVar
RooCmdArg WeightVar(const char *name, Bool_t reinterpretAsWeight=kFALSE)
Definition: RooGlobalFunc.cxx:123

RooFit::SumW2Error
RooCmdArg SumW2Error(Bool_t flag)
Definition: RooGlobalFunc.cxx:207

RooFit::Save
RooCmdArg Save(Bool_t flag=kTRUE)
Definition: RooGlobalFunc.cxx:187

RooFit::PrintLevel
RooCmdArg PrintLevel(Int_t code)
Definition: RooGlobalFunc.cxx:189

RooFit::BatchMode
RooCmdArg BatchMode(bool flag=true)
Definition: RooGlobalFunc.cxx:158

RooFit::AsymptoticError
RooCmdArg AsymptoticError(Bool_t flag)
Definition: RooGlobalFunc.cxx:208

c1
return c1
Definition: legend1.C:41

leg
leg
Definition: legend1.C:34

RooFit
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition: RooCFunction1Binding.h:29

RooFit::Minimization
@ Minimization
Definition: RooGlobalFunc.h:67

RooFit::Fitting
@ Fitting
Definition: RooGlobalFunc.h:67