Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBinnedL.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * PB, Patrick Bos, Netherlands eScience Center, p.bos@esciencecenter.nl
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file RooBinnedL.cxx
15\class RooBinnedL
16\ingroup Roofitcore
17
18Implements a -log(likelihood) calculation from a dataset
19(assumed to be binned) and a PDF. The NLL is calculated as
20\f[
21 \sum_\mathrm{data} -\log( \mathrm{pdf}(x_\mathrm{data}))
22\f]
23In extended mode, a
24\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
25**/
26
28
29#include "RooAbsData.h"
30#include "RooAbsPdf.h"
31#include "RooAbsDataStore.h"
32#include "RooRealSumPdf.h"
33#include "RooRealVar.h"
34#include "RooChangeTracker.h"
35
36#include "TMath.h"
37
38namespace RooFit {
39namespace TestStatistics {
40
42 : RooAbsL(RooAbsL::ClonePdfData{pdf, data}, data->numEntries(), 1)
43{
44 // pdf must be a RooRealSumPdf representing a yield vector for a binned likelihood calculation
45 if (!dynamic_cast<RooRealSumPdf *>(pdf)) {
46 throw std::logic_error("RooBinnedL can only be created from pdf of type RooRealSumPdf!");
47 }
48
49 // Retrieve and cache bin widths needed to convert unnormalized binned pdf values back to yields
50
51 // The Active label will disable pdf integral calculations
52 pdf->setAttribute("BinnedLikelihoodActive");
53
54 RooArgSet params;
55 pdf->getParameters(data->get(), params) ;
56 paramTracker_ = std::make_unique<RooChangeTracker>("chtracker","change tracker",params,true);
57
58 std::unique_ptr<RooArgSet> obs(pdf->getObservables(data));
59 if (obs->size() != 1) {
60 throw std::logic_error(
61 "RooBinnedL can only be created from combination of pdf and data which has exactly one observable!");
62 } else {
63 RooRealVar *var = static_cast<RooRealVar *>(obs->first());
64 std::list<double> *boundaries = pdf->binBoundaries(*var, var->getMin(), var->getMax());
65 std::list<double>::iterator biter = boundaries->begin();
66 _binw.resize(boundaries->size() - 1);
67 double lastBound = (*biter);
68 ++biter;
69 int ibin = 0;
70 while (biter != boundaries->end()) {
71 _binw[ibin] = (*biter) - lastBound;
72 lastBound = (*biter);
73 ibin++;
74 ++biter;
75 }
76 }
77}
78
79RooBinnedL::~RooBinnedL() = default;
80
81//////////////////////////////////////////////////////////////////////////////////
82/// Calculate and return likelihood on subset of data from firstEvent to lastEvent
83/// processed with a step size of 'stepSize'. If this an extended likelihood and
84/// and the zero event is processed the extended term is added to the return
85/// likelihood.
86//
88RooBinnedL::evaluatePartition(Section bins, std::size_t /*components_begin*/, std::size_t /*components_end*/)
89{
90 // Throughout the calculation, we use Kahan's algorithm for summing to
91 // prevent loss of precision - this is a factor four more expensive than
92 // straight addition, but since evaluating the PDF is usually much more
93 // expensive than that, we tolerate the additional cost...
95
96 // Do not reevaluate likelihood if parameters nor event range have changed
97 if (!paramTracker_->hasChanged(true) && bins == lastSection_ && (cachedResult_.Sum() != 0 || cachedResult_.Carry() != 0)) return cachedResult_;
98
99// data->store()->recalculateCache(_projDeps, firstEvent, lastEvent, stepSize, (_binnedPdf?false:true));
100 // TODO: check when we might need _projDeps (it seems to be mostly empty); ties in with TODO below
101 data_->store()->recalculateCache(nullptr, bins.begin(N_events_), bins.end(N_events_), 1, false);
102
104
105 for (std::size_t i = bins.begin(N_events_); i < bins.end(N_events_); ++i) {
106
107 data_->get(i);
108
109 double eventWeight = data_->weight();
110
111 // Calculate log(Poisson(N|mu) for this bin
112 double N = eventWeight;
113 double mu = pdf_->getVal() * _binw[i];
114
115 if (mu <= 0 && N > 0) {
116
117 // Catch error condition: data present where zero events are predicted
118// logEvalError(Form("Observed %f events in bin %d with zero event yield", N, i));
119 // TODO: check if using regular stream vs logEvalError error gathering is ok
120 oocoutI(nullptr, Minimization)
121 << "Observed " << N << " events in bin " << i << " with zero event yield" << std::endl;
122
123 } else if (std::abs(mu) < 1e-10 && std::abs(N) < 1e-10) {
124
125 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
126 // since log(mu)=0. No update of result is required since term=0.
127
128 } else {
129
130 double term = -1 * (-mu + N * log(mu) - TMath::LnGamma(N + 1));
131
132 sumWeight += eventWeight;
133 result += term;
134 }
135 }
136
137 // If part of simultaneous PDF normalize probability over
138 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
139 if (sim_count_ > 1) {
140 result += sumWeight.Sum() * log(1.0 * sim_count_);
141 }
142
143 // At the end of the first full calculation, wire the caches
144 if (_first) {
145 _first = false;
146 pdf_->wireAllCaches();
147 }
148
150 lastSection_ = bins;
151 return result;
152}
153
154} // namespace TestStatistics
155} // namespace RooFit
#define e(i)
Definition RSha256.hxx:103
#define oocoutI(o, a)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
RooFit::OwningPtr< RooArgSet > getParameters(const RooAbsData *data, bool stripDisconnected=true) const
Create a list of leaf nodes in the arg tree starting with ourself as top node that don't match any of...
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
void setAttribute(const Text_t *name, bool value=true)
Set (default) or clear a named boolean attribute of this object.
Abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual double getMax(const char *name=nullptr) const
Get maximum of currently defined range.
virtual double getMin(const char *name=nullptr) const
Get minimum of currently defined range.
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Convenience wrapper class used to distinguish between pdf/data owning and non-owning constructors.
Definition RooAbsL.h:39
std::shared_ptr< RooAbsData > data_
Definition RooAbsL.h:143
std::shared_ptr< RooAbsPdf > pdf_
Definition RooAbsL.h:142
ROOT::Math::KahanSum< double > evaluatePartition(Section bins, std::size_t components_begin, std::size_t components_end) override
Calculate and return likelihood on subset of data from firstEvent to lastEvent processed with a step ...
ROOT::Math::KahanSum< double > cachedResult_
Definition RooBinnedL.h:45
std::unique_ptr< RooChangeTracker > paramTracker_
Definition RooBinnedL.h:43
RooBinnedL(RooAbsPdf *pdf, RooAbsData *data)
std::vector< double > _binw
!
Definition RooBinnedL.h:42
Implements a PDF constructed from a sum of functions:
Variable that can be changed from the outside.
Definition RooRealVar.h:37
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
A part of some range delimited by two fractional points between 0 and 1 (inclusive).
Definition RooAbsL.h:65
std::size_t begin(std::size_t N_total) const
Definition RooAbsL.h:73
std::size_t end(std::size_t N_total) const
Definition RooAbsL.h:75