Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVar.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*****************************************************************************
4 * Project: RooFit *
5 * Package: RooFitCore *
6 * @(#)root/roofitcore:$Id$
7 * Authors: *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
10 * *
11 * Copyright (c) 2000-2005, Regents of the University of California *
12 * and Stanford University. All rights reserved. *
13 * *
14 * Redistribution and use in source and binary forms, *
15 * with or without modification, are permitted according to the terms *
16 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
17 *****************************************************************************/
18
19/**
20\file RooNLLVar.cxx
21\class RooNLLVar
22\ingroup Roofitcore
23
24Implements a -log(likelihood) calculation from a dataset
25and a PDF. The NLL is calculated as
26\f[
27 \sum_\mathrm{data} -\log( \mathrm{pdf}(x_\mathrm{data}))
28\f]
29In extended mode, a
30\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
31**/
32
33#include "RooNLLVar.h"
34
35#include <RooAbsData.h>
36#include <RooAbsDataStore.h>
37#include <RooAbsPdf.h>
38#include <RooCmdConfig.h>
39#include <RooDataHist.h>
40#include <RooHistPdf.h>
41#include <RooMsgService.h>
42#include <RooNaNPacker.h>
43#include <RooProdPdf.h>
44#include "RooRealMPFE.h"
45#include <RooRealSumPdf.h>
46#include <RooRealVar.h>
47
48#include "TMath.h"
49#include "Math/Util.h"
50
51#include <algorithm>
52
53RooNLLVar::~RooNLLVar() {}
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
58/// For internal use.
59
60RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
61 bool extended, RooAbsTestStatistic::Configuration const& cfg) :
62 RooNLLVar{name, title, pdf, indata, RooArgSet(), extended, cfg} {}
63
64
65////////////////////////////////////////////////////////////////////////////////
66/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
67/// For internal use.
68
69RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &indata, const RooArgSet &projDeps,
70 bool extended, RooAbsTestStatistic::Configuration const &cfg)
71 : RooAbsOptTestStatistic(name, title, pdf, indata, projDeps, cfg),
72 _extended(extended),
74{
75 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
76 // for a binned likelihood calculation
77
78 // Retrieve and cache bin widths needed to convert un-normalized binnedPdf values back to yields
79 if (_binnedPdf) {
80
81 // The Active label will disable pdf integral calculations
82 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
83
84 RooArgSet obs;
85 _funcClone->getObservables(_dataClone->get(), obs);
86 if (obs.size()!=1) {
87 _binnedPdf = nullptr;
88 } else {
89 auto* var = static_cast<RooRealVar*>(obs.first());
90 std::unique_ptr<std::list<double>> boundaries{_binnedPdf->binBoundaries(*var,var->getMin(),var->getMax())};
91 auto biter = boundaries->begin() ;
92 _binw.reserve(boundaries->size()-1) ;
93 double lastBound = (*biter) ;
94 ++biter ;
95 while (biter!=boundaries->end()) {
96 _binw.push_back((*biter) - lastBound);
97 lastBound = (*biter) ;
98 ++biter ;
99 }
100 }
101
102 _skipZeroWeights = false;
103 } else {
104 _skipZeroWeights = true;
105 }
106}
107
108
109
110////////////////////////////////////////////////////////////////////////////////
111/// Copy constructor
112
113RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
114 RooAbsOptTestStatistic(other,name),
115 _extended(other._extended),
118 _binw(other._binw),
120{
121}
122
123
124////////////////////////////////////////////////////////////////////////////////
125/// Create a test statistic using several properties of the current instance. This is used to duplicate
126/// the test statistic in multi-processing scenarios.
127RooAbsTestStatistic* RooNLLVar::create(const char *name, const char *title, RooAbsReal& pdf, RooAbsData& adata,
128 const RooArgSet& projDeps, RooAbsTestStatistic::Configuration const& cfg) {
129 RooAbsPdf & thePdf = dynamic_cast<RooAbsPdf&>(pdf);
130 // check if pdf can be extended
131 bool extendedPdf = _extended && thePdf.canBeExtended();
132
133 auto testStat = new RooNLLVar(name, title, thePdf, adata, projDeps, extendedPdf, cfg);
134 return testStat;
135}
136
137
138////////////////////////////////////////////////////////////////////////////////
139
140void RooNLLVar::applyWeightSquared(bool flag)
141{
142 if (_gofOpMode==Slave) {
143 if (flag != _weightSq) {
144 _weightSq = flag;
145 std::swap(_offset, _offsetSaveW2);
146 }
147 setValueDirty();
148 } else if ( _gofOpMode==MPMaster) {
149 for (int i=0 ; i<_nCPU ; i++)
151 } else if ( _gofOpMode==SimMaster) {
152 for(auto& gof : _gofArray)
153 static_cast<RooNLLVar&>(*gof).applyWeightSquared(flag);
154 }
155}
156
157
158////////////////////////////////////////////////////////////////////////////////
159/// Calculate and return likelihood on subset of data.
160/// \param[in] firstEvent First event to be processed.
161/// \param[in] lastEvent First event not to be processed, any more.
162/// \param[in] stepSize Steps between events.
163/// \note For batch computations, the step size **must** be one.
164///
165/// If this an extended likelihood, the extended term is added to the return likelihood
166/// in the batch that encounters the event with index 0.
167
168double RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
169{
170 // Throughout the calculation, we use Kahan's algorithm for summing to
171 // prevent loss of precision - this is a factor four more expensive than
172 // straight addition, but since evaluating the PDF is usually much more
173 // expensive than that, we tolerate the additional cost...
175 double sumWeight{0.0};
176
177 auto * pdfClone = static_cast<RooAbsPdf*>(_funcClone);
178
179
180 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
181 if (_binnedPdf) {
183 for (auto i=firstEvent ; i<lastEvent ; i+=stepSize) {
184
185 _dataClone->get(i) ;
186
187 double eventWeight = _dataClone->weight();
188
189
190 // Calculate log(Poisson(N|mu) for this bin
191 double N = eventWeight ;
192 double mu = _binnedPdf->getVal()*_binw[i] ;
193 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << std::endl ;
194
195 if (mu<=0 && N>0) {
196
197 // Catch error condition: data present where zero events are predicted
198 logEvalError(Form("Observed %f events in bin %lu with zero event yield",N,(unsigned long)i)) ;
199
200 } else if (std::abs(mu)<1e-10 && std::abs(N)<1e-10) {
201
202 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
203 // since log(mu)=0. No update of result is required since term=0.
204
205 } else {
206
207 double term = 0.0;
208 if(_doBinOffset) {
209 term -= -mu + N + N * (std::log(mu) - std::log(N));
210 } else {
211 term -= -mu + N * std::log(mu) - TMath::LnGamma(N+1);
212 }
213 result += term;
214 sumWeightKahanSum += eventWeight;
215
216 }
217 }
218
220
221 } else { //unbinned PDF
222
224
225 // include the extended maximum likelihood term, if requested
226 if(_extended && _setNum==_extSet) {
227 result += pdfClone->extendedTerm(*_dataClone, _weightSq, _doBinOffset);
228 }
229 } //unbinned PDF
230
231
232 // If part of simultaneous PDF normalize probability over
233 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
234 // If we do bin-by bin offsetting, we don't do this because it cancels out
235 if (!_doBinOffset && _simCount>1) {
236 result += sumWeight * std::log(static_cast<double>(_simCount));
237 }
238
239
240 // At the end of the first full calculation, wire the caches
241 if (_first) {
242 _first = false ;
243 _funcClone->wireAllCaches() ;
244 }
245
246
247 // Check if value offset flag is set.
248 if (_doOffset) {
249
250 // If no offset is stored enable this feature now
251 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
252 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result.Sum() << std::endl ;
253 _offset = result ;
254 }
255
256 // Subtract offset
257 result -= _offset;
258 }
259
260 _evalCarry = result.Carry();
261 return result.Sum() ;
262}
263
264RooNLLVar::ComputeResult RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
265 auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
267}
268
269RooNLLVar::ComputeResult RooNLLVar::computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone,
270 RooArgSet *normSet, bool weightSq, std::size_t stepSize,
271 std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const* offsetPdf)
272{
276
277 for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
278 dataClone->get(i) ;
279
280 double weight = dataClone->weight(); //FIXME
281
282 if (0. == weight * weight) continue ;
283 if (weightSq) weight = dataClone->weightSquared() ;
284
285 double logProba = pdfClone->getLogVal(normSet);
286
287 if(offsetPdf) {
288 logProba -= offsetPdf->getLogVal(normSet);
289 }
290
291 const double term = -weight * logProba;
292
293 kahanWeight.Add(weight);
294 kahanProb.Add(term);
295 packedNaN.accumulate(term);
296 }
297
298 if (packedNaN.getPayload() != 0.) {
299 // Some events with evaluation errors. Return "badness" of errors.
300 return {ROOT::Math::KahanSum<double>{packedNaN.getNaNWithPayload()}, kahanWeight.Sum()};
301 }
302
303 return {kahanProb, kahanWeight.Sum()};
304}
305
306bool RooNLLVar::setDataSlave(RooAbsData &indata, bool cloneData, bool ownNewData)
307{
308 bool ret = RooAbsOptTestStatistic::setDataSlave(indata, cloneData, ownNewData);
309 // To re-create the data template pdf if necessary
310 _offsetPdf.reset();
312 return ret;
313}
314
315void RooNLLVar::enableBinOffsetting(bool flag)
316{
317 if (!_init) {
318 initialize();
319 }
320
322
323 // If this is a "master" that delegates the actual work to "slaves", the
324 // _offsetPdf will not be reset.
325 bool needsResetting = true;
326
327 switch (operMode()) {
328 case Slave: break;
329 case SimMaster: {
330 for (auto &gof : _gofArray) {
331 static_cast<RooNLLVar &>(*gof).enableBinOffsetting(flag);
332 }
333 needsResetting = false;
334 break;
335 }
336 case MPMaster: {
337 for (int i = 0; i < _nCPU; ++i) {
338 static_cast<RooNLLVar &>(_mpfeArray[i]->arg()).enableBinOffsetting(flag);
339 }
340 needsResetting = false;
341 break;
342 }
343 }
344
345 if (!needsResetting)
346 return;
347
348 if (flag && !_offsetPdf) {
349 std::string name = std::string{GetName()} + "_offsetPdf";
350 std::unique_ptr<RooDataHist> dataTemplate;
351 if (auto dh = dynamic_cast<RooDataHist *>(_dataClone)) {
352 dataTemplate = std::make_unique<RooDataHist>(*dh);
353 } else {
354 dataTemplate = std::unique_ptr<RooDataHist>(static_cast<RooDataSet const &>(*_dataClone).binnedClone());
355 }
356 _offsetPdf = std::make_unique<RooHistPdf>(name.c_str(), name.c_str(), *_funcObsSet, std::move(dataTemplate));
357 _offsetPdf->setOperMode(ADirty);
358 }
359 setValueDirty();
360}
361
362/// \endcond
#define e(i)
Definition RSha256.hxx:103
#define coutI(a)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
const_iterator begin() const
const_iterator end() const
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:40
Container class to hold unbinned data.
Definition RooDataSet.h:34
RooFit::OwningPtr< RooDataHist > binnedClone(const char *newName=nullptr, const char *newTitle=nullptr) const
Return binned clone of this dataset.
Implements a PDF constructed from a sum of functions:
Variable that can be changed from the outside.
Definition RooRealVar.h:37
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition Functions.h:282
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
Little struct that can pack a float into the unused bits of the mantissa of a NaN double.