Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVar.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17/**
18\file RooNLLVar.cxx
19\class RooNLLVar
20\ingroup Roofitcore
21
22Implements a -log(likelihood) calculation from a dataset
23and a PDF. The NLL is calculated as
24\f[
25 \sum_\mathrm{data} -\log( \mathrm{pdf}(x_\mathrm{data}))
26\f]
27In extended mode, a
28\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
29**/
30
31#include "RooNLLVar.h"
32
33#include <RooAbsData.h>
34#include <RooAbsDataStore.h>
35#include <RooAbsPdf.h>
36#include <RooCmdConfig.h>
37#include <RooDataHist.h>
38#include <RooHistPdf.h>
39#include <RooMsgService.h>
40#include <RooNaNPacker.h>
41#include <RooProdPdf.h>
42#include "RooRealMPFE.h"
43#include <RooRealSumPdf.h>
44#include <RooRealVar.h>
45
46#include "TMath.h"
47#include "Math/Util.h"
48
49#include <algorithm>
50
52
53
54////////////////////////////////////////////////////////////////////////////////
55/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
56/// For internal use.
57
58RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
59 bool extended, RooAbsTestStatistic::Configuration const& cfg) :
60 RooNLLVar{name, title, pdf, indata, RooArgSet(), extended, cfg} {}
61
62
63////////////////////////////////////////////////////////////////////////////////
64/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
65/// For internal use.
66
67RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &indata, const RooArgSet &projDeps,
68 bool extended, RooAbsTestStatistic::Configuration const &cfg)
69 : RooAbsOptTestStatistic(name, title, pdf, indata, projDeps, cfg),
70 _extended(extended),
71 _binnedPdf(cfg.binnedL ? static_cast<RooRealSumPdf *>(_funcClone) : nullptr)
72{
73 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
74 // for a binned likelihood calculation
75
76 // Retrieve and cache bin widths needed to convert un-normalized binnedPdf values back to yields
77 if (_binnedPdf) {
78
79 // The Active label will disable pdf integral calculations
80 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
81
82 RooArgSet obs;
84 if (obs.size()!=1) {
85 _binnedPdf = nullptr;
86 } else {
87 auto* var = static_cast<RooRealVar*>(obs.first());
88 std::unique_ptr<std::list<double>> boundaries{_binnedPdf->binBoundaries(*var,var->getMin(),var->getMax())};
89 auto biter = boundaries->begin() ;
90 _binw.reserve(boundaries->size()-1) ;
91 double lastBound = (*biter) ;
92 ++biter ;
93 while (biter!=boundaries->end()) {
94 _binw.push_back((*biter) - lastBound);
95 lastBound = (*biter) ;
96 ++biter ;
97 }
98 }
99
100 _skipZeroWeights = false;
101 } else {
102 _skipZeroWeights = true;
103 }
104}
105
106
107
108////////////////////////////////////////////////////////////////////////////////
109/// Copy constructor
110
111RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
113 _extended(other._extended),
114 _weightSq(other._weightSq),
115 _offsetSaveW2(other._offsetSaveW2),
116 _binw(other._binw),
117 _binnedPdf{other._binnedPdf}
118{
119}
120
121
122////////////////////////////////////////////////////////////////////////////////
123/// Create a test statistic using several properties of the current instance. This is used to duplicate
124/// the test statistic in multi-processing scenarios.
125RooAbsTestStatistic* RooNLLVar::create(const char *name, const char *title, RooAbsReal& pdf, RooAbsData& adata,
126 const RooArgSet& projDeps, RooAbsTestStatistic::Configuration const& cfg) {
127 RooAbsPdf & thePdf = dynamic_cast<RooAbsPdf&>(pdf);
128 // check if pdf can be extended
129 bool extendedPdf = _extended && thePdf.canBeExtended();
130
131 auto testStat = new RooNLLVar(name, title, thePdf, adata, projDeps, extendedPdf, cfg);
132 return testStat;
133}
134
135
136////////////////////////////////////////////////////////////////////////////////
137
139{
140 if (_gofOpMode==Slave) {
141 if (flag != _weightSq) {
142 _weightSq = flag;
143 std::swap(_offset, _offsetSaveW2);
144 }
146 } else if ( _gofOpMode==MPMaster) {
147 for (int i=0 ; i<_nCPU ; i++)
148 _mpfeArray[i]->applyNLLWeightSquared(flag);
149 } else if ( _gofOpMode==SimMaster) {
150 for(auto& gof : _gofArray)
151 static_cast<RooNLLVar&>(*gof).applyWeightSquared(flag);
152 }
153}
154
155
156////////////////////////////////////////////////////////////////////////////////
157/// Calculate and return likelihood on subset of data.
158/// \param[in] firstEvent First event to be processed.
159/// \param[in] lastEvent First event not to be processed, any more.
160/// \param[in] stepSize Steps between events.
161/// \note For batch computations, the step size **must** be one.
162///
163/// If this an extended likelihood, the extended term is added to the return likelihood
164/// in the batch that encounters the event with index 0.
165
166double RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
167{
168 // Throughout the calculation, we use Kahan's algorithm for summing to
169 // prevent loss of precision - this is a factor four more expensive than
170 // straight addition, but since evaluating the PDF is usually much more
171 // expensive than that, we tolerate the additional cost...
173 double sumWeight{0.0};
174
175 auto * pdfClone = static_cast<RooAbsPdf*>(_funcClone);
176
177
178 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
179 if (_binnedPdf) {
180 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
181 for (auto i=firstEvent ; i<lastEvent ; i+=stepSize) {
182
183 _dataClone->get(i) ;
184
185 double eventWeight = _dataClone->weight();
186
187
188 // Calculate log(Poisson(N|mu) for this bin
189 double N = eventWeight ;
190 double mu = _binnedPdf->getVal()*_binw[i] ;
191 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
192
193 if (mu<=0 && N>0) {
194
195 // Catch error condition: data present where zero events are predicted
196 logEvalError(Form("Observed %f events in bin %lu with zero event yield",N,(unsigned long)i)) ;
197
198 } else if (std::abs(mu)<1e-10 && std::abs(N)<1e-10) {
199
200 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
201 // since log(mu)=0. No update of result is required since term=0.
202
203 } else {
204
205 double term = 0.0;
206 if(_doBinOffset) {
207 term -= -mu + N + N * (std::log(mu) - std::log(N));
208 } else {
209 term -= -mu + N * std::log(mu) - TMath::LnGamma(N+1);
210 }
211 result += term;
212 sumWeightKahanSum += eventWeight;
213
214 }
215 }
216
217 sumWeight = sumWeightKahanSum.Sum();
218
219 } else { //unbinned PDF
220
221 std::tie(result, sumWeight) = computeScalar(stepSize, firstEvent, lastEvent);
222
223 // include the extended maximum likelihood term, if requested
224 if(_extended && _setNum==_extSet) {
225 result += pdfClone->extendedTerm(*_dataClone, _weightSq, _doBinOffset);
226 }
227 } //unbinned PDF
228
229
230 // If part of simultaneous PDF normalize probability over
231 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
232 // If we do bin-by bin offsetting, we don't do this because it cancels out
233 if (!_doBinOffset && _simCount>1) {
234 result += sumWeight * std::log(static_cast<double>(_simCount));
235 }
236
237
238 // At the end of the first full calculation, wire the caches
239 if (_first) {
240 _first = false ;
242 }
243
244
245 // Check if value offset flag is set.
246 if (_doOffset) {
247
248 // If no offset is stored enable this feature now
249 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
250 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result.Sum() << std::endl ;
251 _offset = result ;
252 }
253
254 // Subtract offset
255 result -= _offset;
256 }
257
258 _evalCarry = result.Carry();
259 return result.Sum() ;
260}
261
262RooNLLVar::ComputeResult RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
263 auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
264 return computeScalarFunc(pdfClone, _dataClone, _normSet, _weightSq, stepSize, firstEvent, lastEvent, _offsetPdf.get());
265}
266
268 RooArgSet *normSet, bool weightSq, std::size_t stepSize,
269 std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const* offsetPdf)
270{
273 RooNaNPacker packedNaN(0.f);
274
275 for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
276 dataClone->get(i) ;
277
278 double weight = dataClone->weight(); //FIXME
279
280 if (0. == weight * weight) continue ;
281 if (weightSq) weight = dataClone->weightSquared() ;
282
283 double logProba = pdfClone->getLogVal(normSet);
284
285 if(offsetPdf) {
286 logProba -= offsetPdf->getLogVal(normSet);
287 }
288
289 const double term = -weight * logProba;
290
291 kahanWeight.Add(weight);
292 kahanProb.Add(term);
293 packedNaN.accumulate(term);
294 }
295
296 if (packedNaN.getPayload() != 0.) {
297 // Some events with evaluation errors. Return "badness" of errors.
298 return {ROOT::Math::KahanSum<double>{packedNaN.getNaNWithPayload()}, kahanWeight.Sum()};
299 }
300
301 return {kahanProb, kahanWeight.Sum()};
302}
303
304bool RooNLLVar::setDataSlave(RooAbsData &indata, bool cloneData, bool ownNewData)
305{
306 bool ret = RooAbsOptTestStatistic::setDataSlave(indata, cloneData, ownNewData);
307 // To re-create the data template pdf if necessary
308 _offsetPdf.reset();
310 return ret;
311}
312
314{
315 if (!_init) {
316 initialize();
317 }
318
319 _doBinOffset = flag;
320
321 // If this is a "master" that delegates the actual work to "slaves", the
322 // _offsetPdf will not be reset.
323 bool needsResetting = true;
324
325 switch (operMode()) {
326 case Slave: break;
327 case SimMaster: {
328 for (auto &gof : _gofArray) {
329 static_cast<RooNLLVar &>(*gof).enableBinOffsetting(flag);
330 }
331 needsResetting = false;
332 break;
333 }
334 case MPMaster: {
335 for (int i = 0; i < _nCPU; ++i) {
336 static_cast<RooNLLVar &>(_mpfeArray[i]->arg()).enableBinOffsetting(flag);
337 }
338 needsResetting = false;
339 break;
340 }
341 }
342
343 if (!needsResetting)
344 return;
345
346 if (flag && !_offsetPdf) {
347 std::string name = std::string{GetName()} + "_offsetPdf";
348 std::unique_ptr<RooDataHist> dataTemplate;
349 if (auto dh = dynamic_cast<RooDataHist *>(_dataClone)) {
350 dataTemplate = std::make_unique<RooDataHist>(*dh);
351 } else {
352 dataTemplate = std::unique_ptr<RooDataHist>(static_cast<RooDataSet const &>(*_dataClone).binnedClone());
353 }
354 _offsetPdf = std::make_unique<RooHistPdf>(name.c_str(), name.c_str(), *_funcObsSet, std::move(dataTemplate));
355 _offsetPdf->setOperMode(ADirty);
356 }
358}
#define e(i)
Definition RSha256.hxx:103
#define coutI(a)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
void Add(T x)
Single-element accumulation. Will not vectorise.
Definition Util.h:165
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
void wireAllCaches()
void setValueDirty()
Mark the element dirty. This forces a re-evaluation when a value is requested.
Definition RooAbsArg.h:431
void setAttribute(const Text_t *name, bool value=true)
Set (default) or clear a named boolean attribute of this object.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
virtual double weight() const =0
virtual const RooArgSet * get() const
Definition RooAbsData.h:101
virtual double weightSquared() const =0
Abstract base class for test statistics objects that evaluate a function or PDF at each point of a gi...
bool setDataSlave(RooAbsData &data, bool cloneData=true, bool ownNewDataAnyway=false) override
Change dataset that is used to given one.
RooAbsReal * _funcClone
Pointer to internal clone of input function.
bool _skipZeroWeights
! Whether to skip entries with weight zero in the evaluation
RooArgSet * _funcObsSet
List of observables in the pdf expression.
RooArgSet * _normSet
Pointer to set with observables used for normalization.
RooAbsData * _dataClone
Pointer to internal clone if input data.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
bool canBeExtended() const
If true, PDF can provide extended likelihood term.
Definition RooAbsPdf.h:218
virtual double getLogVal(const RooArgSet *set=nullptr) const
Return the log of the current value with given normalization An error message is printed if the argum...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
void logEvalError(const char *message, const char *serverValueString=nullptr) const
Log evaluation error message.
Abstract base class for all test statistics.
Int_t _setNum
Partition number of this instance in parallel calculation mode.
double _evalCarry
! carry of Kahan sum in evaluatePartition
GOFOpMode operMode() const
Int_t _nCPU
Number of processors to use in parallel calculation mode.
GOFOpMode _gofOpMode
Operation mode of test statistic instance.
bool _init
! Is object initialized
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _extSet
! Number of designated set to calculated extended term
std::vector< std::unique_ptr< RooAbsTestStatistic > > _gofArray
! Array of sub-contexts representing part of the combined test statistic
bool initialize()
One-time initialization of the test statistic.
pRooRealMPFE * _mpfeArray
! Array of parallel execution frond ends
bool _doOffset
Apply interval value offset to control numeric precision?
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:40
Container class to hold unbinned data.
Definition RooDataSet.h:34
RooFit::OwningPtr< RooDataHist > binnedClone(const char *newName=nullptr, const char *newTitle=nullptr) const
Return binned clone of this dataset.
Implements a -log(likelihood) calculation from a dataset and a PDF.
Definition RooNLLVar.h:20
ComputeResult computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const
std::unique_ptr< RooAbsPdf > _offsetPdf
! An optional per-bin likelihood offset
Definition RooNLLVar.h:72
static RooNLLVar::ComputeResult computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone, RooArgSet *normSet, bool weightSq, std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const *offsetPdf=nullptr)
bool _doBinOffset
Definition RooNLLVar.h:65
ROOT::Math::KahanSum< double > _offsetSaveW2
!
Definition RooNLLVar.h:68
RooAbsPdf * _binnedPdf
!
Definition RooNLLVar.h:71
void applyWeightSquared(bool flag) override
Disables or enables the usage of squared weights.
std::vector< double > _binw
!
Definition RooNLLVar.h:70
std::pair< ROOT::Math::KahanSum< double >, double > ComputeResult
Definition RooNLLVar.h:46
bool _extended
Definition RooNLLVar.h:64
bool setDataSlave(RooAbsData &data, bool cloneData=true, bool ownNewDataAnyway=false) override
Change dataset that is used to given one.
RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &data, bool extended, RooAbsTestStatistic::Configuration const &cfg=RooAbsTestStatistic::Configuration{})
Construct likelihood from given p.d.f and (binned or unbinned dataset) For internal use.
Definition RooNLLVar.cxx:58
RooAbsTestStatistic * create(const char *name, const char *title, RooAbsReal &pdf, RooAbsData &adata, const RooArgSet &projDeps, RooAbsTestStatistic::Configuration const &cfg) override
Create a test statistic using several properties of the current instance.
void enableBinOffsetting(bool on=true)
~RooNLLVar() override
Definition RooNLLVar.cxx:51
bool _first
!
Definition RooNLLVar.h:67
bool _weightSq
Apply weights squared?
Definition RooNLLVar.h:66
double evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const override
Calculate and return likelihood on subset of data.
RooAbsReal & arg() const
Definition RooRealMPFE.h:49
Implements a PDF constructed from a sum of functions:
Variable that can be changed from the outside.
Definition RooRealVar.h:37
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
Little struct that can pack a float into the unused bits of the mantissa of a NaN double.
float getPayload() const
Retrieve packed float.
double getNaNWithPayload() const
Retrieve a NaN with the current float payload packed into the mantissa.
void accumulate(double val)
Accumulate a packed float from another NaN into this.