Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVar.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17/**
18\file RooNLLVar.cxx
19\class RooNLLVar
20\ingroup Roofitcore
21
22Implements a -log(likelihood) calculation from a dataset
23and a PDF. The NLL is calculated as
24\f[
25 \sum_\mathrm{data} -\log( \mathrm{pdf}(x_\mathrm{data}))
26\f]
27In extended mode, a
28\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
29**/
30
31#include <RooNLLVar.h>
32
33#include <RooAbsData.h>
34#include <RooAbsDataStore.h>
35#include <RooAbsPdf.h>
36#include <RooCmdConfig.h>
37#include <RooDataHist.h>
38#include <RooHistPdf.h>
39#include <RooMsgService.h>
40#include <RooNaNPacker.h>
41#include <RooProdPdf.h>
42#include <RooRealMPFE.h>
43#include <RooRealSumPdf.h>
44#include <RooRealVar.h>
45
46#include "TMath.h"
47#include "Math/Util.h"
48
49#include <algorithm>
50
52
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
58/// For internal use.
59
60RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
61 bool extended, RooAbsTestStatistic::Configuration const& cfg) :
62 RooNLLVar{name, title, pdf, indata, RooArgSet(), extended, cfg} {}
63
64
65////////////////////////////////////////////////////////////////////////////////
66/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
67/// For internal use.
68
69RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &indata, const RooArgSet &projDeps,
70 bool extended, RooAbsTestStatistic::Configuration const &cfg)
71 : RooAbsOptTestStatistic(name, title, pdf, indata, projDeps, cfg),
72 _extended(extended),
73 _binnedPdf(cfg.binnedL ? static_cast<RooRealSumPdf *>(_funcClone) : nullptr)
74{
75 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
76 // for a binned likelihood calculation
77
78 // Retrieve and cache bin widths needed to convert un-normalized binnedPdf values back to yields
79 if (_binnedPdf) {
80
81 // The Active label will disable pdf integral calculations
82 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
83
84 RooArgSet obs;
86 if (obs.size()!=1) {
87 _binnedPdf = nullptr;
88 } else {
89 auto* var = static_cast<RooRealVar*>(obs.first());
90 std::unique_ptr<std::list<double>> boundaries{_binnedPdf->binBoundaries(*var,var->getMin(),var->getMax())};
91 auto biter = boundaries->begin() ;
92 _binw.reserve(boundaries->size()-1) ;
93 double lastBound = (*biter) ;
94 ++biter ;
95 while (biter!=boundaries->end()) {
96 _binw.push_back((*biter) - lastBound);
97 lastBound = (*biter) ;
98 ++biter ;
99 }
100 }
101
102 _skipZeroWeights = false;
103 } else {
104 _skipZeroWeights = true;
105 }
106}
107
108
109
110////////////////////////////////////////////////////////////////////////////////
111/// Copy constructor
112
113RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
115 _extended(other._extended),
116 _weightSq(other._weightSq),
118 _binw(other._binw),
120{
121}
122
123
124////////////////////////////////////////////////////////////////////////////////
125/// Create a test statistic using several properties of the current instance. This is used to duplicate
126/// the test statistic in multi-processing scenarios.
127RooAbsTestStatistic* RooNLLVar::create(const char *name, const char *title, RooAbsReal& pdf, RooAbsData& adata,
128 const RooArgSet& projDeps, RooAbsTestStatistic::Configuration const& cfg) {
129 RooAbsPdf & thePdf = dynamic_cast<RooAbsPdf&>(pdf);
130 // check if pdf can be extended
131 bool extendedPdf = _extended && thePdf.canBeExtended();
132
133 auto testStat = new RooNLLVar(name, title, thePdf, adata, projDeps, extendedPdf, cfg);
134 return testStat;
135}
136
137
138////////////////////////////////////////////////////////////////////////////////
139
141{
142 if (_gofOpMode==Slave) {
143 if (flag != _weightSq) {
144 _weightSq = flag;
145 std::swap(_offset, _offsetSaveW2);
146 }
148 } else if ( _gofOpMode==MPMaster) {
149 for (int i=0 ; i<_nCPU ; i++)
150 _mpfeArray[i]->applyNLLWeightSquared(flag);
151 } else if ( _gofOpMode==SimMaster) {
152 for(auto& gof : _gofArray)
153 static_cast<RooNLLVar&>(*gof).applyWeightSquared(flag);
154 }
155}
156
157
158////////////////////////////////////////////////////////////////////////////////
159/// Calculate and return likelihood on subset of data.
160/// \param[in] firstEvent First event to be processed.
161/// \param[in] lastEvent First event not to be processed, any more.
162/// \param[in] stepSize Steps between events.
163/// \note For batch computations, the step size **must** be one.
164///
165/// If this an extended likelihood, the extended term is added to the return likelihood
166/// in the batch that encounters the event with index 0.
167
168double RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
169{
170 // Throughout the calculation, we use Kahan's algorithm for summing to
171 // prevent loss of precision - this is a factor four more expensive than
172 // straight addition, but since evaluating the PDF is usually much more
173 // expensive than that, we tolerate the additional cost...
175 double sumWeight{0.0};
176
177 auto * pdfClone = static_cast<RooAbsPdf*>(_funcClone);
178
179
180 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
181 if (_binnedPdf) {
182 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
183 for (auto i=firstEvent ; i<lastEvent ; i+=stepSize) {
184
185 _dataClone->get(i) ;
186
187 double eventWeight = _dataClone->weight();
188
189
190 // Calculate log(Poisson(N|mu) for this bin
191 double N = eventWeight ;
192 double mu = _binnedPdf->getVal()*_binw[i] ;
193 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
194
195 if (mu<=0 && N>0) {
196
197 // Catch error condition: data present where zero events are predicted
198 logEvalError(Form("Observed %f events in bin %lu with zero event yield",N,(unsigned long)i)) ;
199
200 } else if (std::abs(mu)<1e-10 && std::abs(N)<1e-10) {
201
202 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
203 // since log(mu)=0. No update of result is required since term=0.
204
205 } else {
206
207 double term = 0.0;
208 if(_doBinOffset) {
209 term -= -mu + N + N * (std::log(mu) - std::log(N));
210 } else {
211 term -= -mu + N * std::log(mu) - TMath::LnGamma(N+1);
212 }
213 result += term;
214 sumWeightKahanSum += eventWeight;
215
216 }
217 }
218
219 sumWeight = sumWeightKahanSum.Sum();
220
221 } else { //unbinned PDF
222
223 std::tie(result, sumWeight) = computeScalar(stepSize, firstEvent, lastEvent);
224
225 // include the extended maximum likelihood term, if requested
226 if(_extended && _setNum==_extSet) {
227 result += pdfClone->extendedTerm(*_dataClone, _weightSq, _doBinOffset);
228 }
229 } //unbinned PDF
230
231
232 // If part of simultaneous PDF normalize probability over
233 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
234 // If we do bin-by bin offsetting, we don't do this because it cancels out
235 if (!_doBinOffset && _simCount>1) {
236 result += sumWeight * std::log(static_cast<double>(_simCount));
237 }
238
239
240 // At the end of the first full calculation, wire the caches
241 if (_first) {
242 _first = false ;
244 }
245
246
247 // Check if value offset flag is set.
248 if (_doOffset) {
249
250 // If no offset is stored enable this feature now
251 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
252 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result.Sum() << std::endl ;
253 _offset = result ;
254 }
255
256 // Subtract offset
257 result -= _offset;
258 }
259
260 _evalCarry = result.Carry();
261 return result.Sum() ;
262}
263
264RooNLLVar::ComputeResult RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
265 auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
266 return computeScalarFunc(pdfClone, _dataClone, _normSet, _weightSq, stepSize, firstEvent, lastEvent, _offsetPdf.get());
267}
268
270 RooArgSet *normSet, bool weightSq, std::size_t stepSize,
271 std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const* offsetPdf)
272{
275 RooNaNPacker packedNaN(0.f);
276
277 for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
278 dataClone->get(i) ;
279
280 double weight = dataClone->weight(); //FIXME
281
282 if (0. == weight * weight) continue ;
283 if (weightSq) weight = dataClone->weightSquared() ;
284
285 double logProba = pdfClone->getLogVal(normSet);
286
287 if(offsetPdf) {
288 logProba -= offsetPdf->getLogVal(normSet);
289 }
290
291 const double term = -weight * logProba;
292
293 kahanWeight.Add(weight);
294 kahanProb.Add(term);
295 packedNaN.accumulate(term);
296 }
297
298 if (packedNaN.getPayload() != 0.) {
299 // Some events with evaluation errors. Return "badness" of errors.
300 return {ROOT::Math::KahanSum<double>{packedNaN.getNaNWithPayload()}, kahanWeight.Sum()};
301 }
302
303 return {kahanProb, kahanWeight.Sum()};
304}
305
306bool RooNLLVar::setDataSlave(RooAbsData &indata, bool cloneData, bool ownNewData)
307{
308 bool ret = RooAbsOptTestStatistic::setDataSlave(indata, cloneData, ownNewData);
309 // To re-create the data template pdf if necessary
310 _offsetPdf.reset();
312 return ret;
313}
314
316{
317 if (!_init) {
318 initialize();
319 }
320
321 _doBinOffset = flag;
322
323 // If this is a "master" that delegates the actual work to "slaves", the
324 // _offsetPdf will not be reset.
325 bool needsResetting = true;
326
327 switch (operMode()) {
328 case Slave: break;
329 case SimMaster: {
330 for (auto &gof : _gofArray) {
331 static_cast<RooNLLVar &>(*gof).enableBinOffsetting(flag);
332 }
333 needsResetting = false;
334 break;
335 }
336 case MPMaster: {
337 for (int i = 0; i < _nCPU; ++i) {
338 static_cast<RooNLLVar &>(_mpfeArray[i]->arg()).enableBinOffsetting(flag);
339 }
340 needsResetting = false;
341 break;
342 }
343 }
344
345 if (!needsResetting)
346 return;
347
348 if (flag && !_offsetPdf) {
349 std::string name = std::string{GetName()} + "_offsetPdf";
350 std::unique_ptr<RooDataHist> dataTemplate;
351 if (auto dh = dynamic_cast<RooDataHist *>(_dataClone)) {
352 dataTemplate = std::make_unique<RooDataHist>(*dh);
353 } else {
354 dataTemplate = std::unique_ptr<RooDataHist>(static_cast<RooDataSet const &>(*_dataClone).binnedClone());
355 }
356 _offsetPdf = std::make_unique<RooHistPdf>(name.c_str(), name.c_str(), *_funcObsSet, std::move(dataTemplate));
357 _offsetPdf->setOperMode(ADirty);
358 }
360}
#define e(i)
Definition RSha256.hxx:103
RooAbsReal * _funcClone
Pointer to internal clone of input function.
#define coutI(a)
bool _weightSq
Apply weights squared?
Definition RooNLLVar.h:45
RooAbsPdf * _binnedPdf
!
Definition RooNLLVar.h:50
std::vector< double > _binw
!
Definition RooNLLVar.h:49
bool _extended
Definition RooNLLVar.h:43
ROOT::Math::KahanSum< double > _offsetSaveW2
!
Definition RooNLLVar.h:47
#define ClassImp(name)
Definition Rtypes.h:377
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
void Add(T x)
Single-element accumulation. Will not vectorise.
Definition Util.h:165
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
void wireAllCaches()
void setValueDirty()
Mark the element dirty. This forces a re-evaluation when a value is requested.
Definition RooAbsArg.h:488
void setAttribute(const Text_t *name, bool value=true)
Set (default) or clear a named boolean attribute of this object.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
virtual double weight() const =0
virtual const RooArgSet * get() const
Definition RooAbsData.h:101
virtual double weightSquared() const =0
Abstract base class for test statistics objects that evaluate a function or PDF at each point of a gi...
bool setDataSlave(RooAbsData &data, bool cloneData=true, bool ownNewDataAnyway=false) override
Change dataset that is used to given one.
RooAbsReal * _funcClone
Pointer to internal clone of input function.
bool _skipZeroWeights
! Whether to skip entries with weight zero in the evaluation
RooArgSet * _funcObsSet
List of observables in the pdf expression.
RooArgSet * _normSet
Pointer to set with observables used for normalization.
RooAbsData * _dataClone
Pointer to internal clone if input data.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
bool canBeExtended() const
If true, PDF can provide extended likelihood term.
Definition RooAbsPdf.h:219
virtual double getLogVal(const RooArgSet *set=nullptr) const
Return the log of the current value with given normalization An error message is printed if the argum...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
void logEvalError(const char *message, const char *serverValueString=nullptr) const
Log evaluation error message.
Abstract base class for all test statistics.
Int_t _setNum
Partition number of this instance in parallel calculation mode.
double _evalCarry
! carry of Kahan sum in evaluatePartition
GOFOpMode operMode() const
Int_t _nCPU
Number of processors to use in parallel calculation mode.
GOFOpMode _gofOpMode
Operation mode of test statistic instance.
bool _init
! Is object initialized
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _extSet
! Number of designated set to calculated extended term
std::vector< std::unique_ptr< RooAbsTestStatistic > > _gofArray
! Array of sub-contexts representing part of the combined test statistic
bool initialize()
One-time initialization of the test statistic.
pRooRealMPFE * _mpfeArray
! Array of parallel execution frond ends
bool _doOffset
Apply interval value offset to control numeric precision?
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:39
Container class to hold unbinned data.
Definition RooDataSet.h:57
RooFit::OwningPtr< RooDataHist > binnedClone(const char *newName=nullptr, const char *newTitle=nullptr) const
Return binned clone of this dataset.
Implements a -log(likelihood) calculation from a dataset and a PDF.
Definition RooNLLVar.h:50
ComputeResult computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const
std::unique_ptr< RooAbsPdf > _offsetPdf
! An optional per-bin likelihood offset
Definition RooNLLVar.h:102
static RooNLLVar::ComputeResult computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone, RooArgSet *normSet, bool weightSq, std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const *offsetPdf=nullptr)
bool _doBinOffset
Definition RooNLLVar.h:95
ROOT::Math::KahanSum< double > _offsetSaveW2
!
Definition RooNLLVar.h:98
RooAbsPdf * _binnedPdf
!
Definition RooNLLVar.h:101
void applyWeightSquared(bool flag) override
Disables or enables the usage of squared weights.
std::vector< double > _binw
!
Definition RooNLLVar.h:100
std::pair< ROOT::Math::KahanSum< double >, double > ComputeResult
Definition RooNLLVar.h:76
bool _extended
Definition RooNLLVar.h:94
bool setDataSlave(RooAbsData &data, bool cloneData=true, bool ownNewDataAnyway=false) override
Change dataset that is used to given one.
RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &data, bool extended, RooAbsTestStatistic::Configuration const &cfg=RooAbsTestStatistic::Configuration{})
Construct likelihood from given p.d.f and (binned or unbinned dataset) For internal use.
Definition RooNLLVar.cxx:60
RooAbsTestStatistic * create(const char *name, const char *title, RooAbsReal &pdf, RooAbsData &adata, const RooArgSet &projDeps, RooAbsTestStatistic::Configuration const &cfg) override
Create a test statistic using several properties of the current instance.
void enableBinOffsetting(bool on=true)
bool _first
!
Definition RooNLLVar.h:97
bool _weightSq
Apply weights squared?
Definition RooNLLVar.h:96
double evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const override
Calculate and return likelihood on subset of data.
RooAbsReal & arg() const
Definition RooRealMPFE.h:49
Implements a PDF constructed from a sum of functions:
Variable that can be changed from the outside.
Definition RooRealVar.h:37
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
Little struct that can pack a float into the unused bits of the mantissa of a NaN double.
float getPayload() const
Retrieve packed float.
double getNaNWithPayload() const
Retrieve a NaN with the current float payload packed into the mantissa.
void accumulate(double val)
Accumulate a packed float from another NaN into this.