Logo ROOT   6.18/05
Reference Guide
RooNLLVar.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17/**
18\file RooNLLVar.cxx
19\class RooNLLVar
20\ingroup Roofitcore
21
22Class RooNLLVar implements a a -log(likelihood) calculation from a dataset
23and a PDF. The NLL is calculated as
24<pre>
25 Sum[data] -log( pdf(x_data) )
26</pre>
27In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added
28**/
29
30#include <algorithm>
31
32#include "RooFit.h"
33#include "Riostream.h"
34#include "TMath.h"
35
36#include "RooNLLVar.h"
37#include "RooAbsData.h"
38#include "RooAbsPdf.h"
39#include "RooCmdConfig.h"
40#include "RooMsgService.h"
41#include "RooAbsDataStore.h"
42#include "RooRealMPFE.h"
43#include "RooRealSumPdf.h"
44#include "RooRealVar.h"
45#include "RooProdPdf.h"
46
48;
49
51
52
53////////////////////////////////////////////////////////////////////////////////
54/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
55///
56/// Argument | Description
57/// -------------------------|------------
58/// Extended() | Include extended term in calculation
59/// NumCPU() | Activate parallel processing feature
60/// Range() | Fit only selected region
61/// SumCoefRange() | Set the range in which to interpret the coefficients of RooAddPdf components
62/// SplitRange() | Fit range is split by index catory of simultaneous PDF
63/// ConditionalObservables() | Define conditional observables
64/// Verbose() | Verbose output of GOF framework classes
65/// CloneData() | Clone input dataset for internal use (default is kTRUE)
66
67RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
68 const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
69 const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
70 const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
71 RooAbsOptTestStatistic(name,title,pdf,indata,
72 *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
73 ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
74 RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
75 RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
76 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
78 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
79 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
80 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
81{
82 RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
83 pc.allowUndefined() ;
84 pc.defineInt("extended","Extended",0,kFALSE) ;
85
86 pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
87 pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
88 pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
89
90 _extended = pc.getInt("extended") ;
92 _first = kTRUE ;
93 _offset = 0.;
94 _offsetCarry = 0.;
95 _offsetSaveW2 = 0.;
97
98 _binnedPdf = 0 ;
99}
100
101
102
103////////////////////////////////////////////////////////////////////////////////
104/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
105/// For internal use.
106
107RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
108 Bool_t extended, const char* rangeName, const char* addCoefRangeName,
109 Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
110 RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
111 _extended(extended),
112 _weightSq(kFALSE),
113 _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
114{
115 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
116 // for a binned likelihood calculation
117 _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
118
119 // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
120 if (_binnedPdf) {
121
122 // The Active label will disable pdf integral calculations
123 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
124
126 if (obs->getSize()!=1) {
127 _binnedPdf = 0 ;
128 } else {
129 RooRealVar* var = (RooRealVar*) obs->first() ;
130 std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
131 std::list<Double_t>::iterator biter = boundaries->begin() ;
132 _binw.resize(boundaries->size()-1) ;
133 Double_t lastBound = (*biter) ;
134 ++biter ;
135 int ibin=0 ;
136 while (biter!=boundaries->end()) {
137 _binw[ibin] = (*biter) - lastBound ;
138 lastBound = (*biter) ;
139 ibin++ ;
140 ++biter ;
141 }
142 }
143 }
144}
145
146
147
148////////////////////////////////////////////////////////////////////////////////
149/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
150/// For internal use.
151
152RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
153 const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
154 Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
155 RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
156 _extended(extended),
157 _weightSq(kFALSE),
158 _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
159{
160 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
161 // for a binned likelihood calculation
162 _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
163
164 // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
165 if (_binnedPdf) {
166
168 if (obs->getSize()!=1) {
169 _binnedPdf = 0 ;
170 } else {
171 RooRealVar* var = (RooRealVar*) obs->first() ;
172 std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
173 std::list<Double_t>::iterator biter = boundaries->begin() ;
174 _binw.resize(boundaries->size()-1) ;
175 Double_t lastBound = (*biter) ;
176 ++biter ;
177 int ibin=0 ;
178 while (biter!=boundaries->end()) {
179 _binw[ibin] = (*biter) - lastBound ;
180 lastBound = (*biter) ;
181 ibin++ ;
182 ++biter ;
183 }
184 }
185 }
186}
187
188
189
190////////////////////////////////////////////////////////////////////////////////
191/// Copy constructor
192
193RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
195 _extended(other._extended),
196 _weightSq(other._weightSq),
197 _first(kTRUE), _offsetSaveW2(other._offsetSaveW2),
198 _offsetCarrySaveW2(other._offsetCarrySaveW2),
199 _binw(other._binw) {
201}
202
203
204
205
206////////////////////////////////////////////////////////////////////////////////
207/// Destructor
208
210{
211}
212
213
214
215
216////////////////////////////////////////////////////////////////////////////////
217
219{
220 if (_gofOpMode==Slave) {
221 if (flag != _weightSq) {
222 _weightSq = flag;
225 }
227 } else if ( _gofOpMode==MPMaster) {
228 for (Int_t i=0 ; i<_nCPU ; i++)
229 _mpfeArray[i]->applyNLLWeightSquared(flag);
230 } else if ( _gofOpMode==SimMaster) {
231 for (Int_t i=0 ; i<_nGof ; i++)
232 ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
233 }
234}
235
236
237
238////////////////////////////////////////////////////////////////////////////////
239/// Calculate and return likelihood on subset of data.
240/// \param[in] firstEvent First event to be processed.
241/// \param[in] lastEvent First event not to be processed, any more.
242/// \param[in] stepSize Steps between events.
243/// \note For efficient batch computations, the step size **must** be one.
244///
245/// If this an extended likelihood, the extended term is added to the return likelihood
246/// in the batch that encounters the event with index 0.
247
248Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
249{
250 // Throughout the calculation, we use Kahan's algorithm for summing to
251 // prevent loss of precision - this is a factor four more expensive than
252 // straight addition, but since evaluating the PDF is usually much more
253 // expensive than that, we tolerate the additional cost...
254 Int_t i ;
255 Double_t result(0), carry(0);
256
257 RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
258
259 // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
260
261 _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ;
262
263 Double_t sumWeight(0), sumWeightCarry(0);
264
265 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
266 if (_binnedPdf) {
267
268 for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
269
270 _dataClone->get(i) ;
271
272 if (!_dataClone->valid()) continue;
273
274 Double_t eventWeight = _dataClone->weight();
275
276
277 // Calculate log(Poisson(N|mu) for this bin
278 Double_t N = eventWeight ;
279 Double_t mu = _binnedPdf->getVal()*_binw[i] ;
280 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
281
282 if (mu<=0 && N>0) {
283
284 // Catch error condition: data present where zero events are predicted
285 logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ;
286
287 } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
288
289 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
290 // since log(mu)=0. No update of result is required since term=0.
291
292 } else {
293
294 Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
295
296 // Kahan summation of sumWeight
297 Double_t y = eventWeight - sumWeightCarry;
298 Double_t t = sumWeight + y;
299 sumWeightCarry = (t - sumWeight) - y;
300 sumWeight = t;
301
302 // Kahan summation of result
303 y = term - carry;
304 t = result + y;
305 carry = (t - result) - y;
306 result = t;
307 }
308 }
309
310
311 } else {
312
313 for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
314
315 _dataClone->get(i) ;
316
317 if (!_dataClone->valid()) continue;
318
319 Double_t eventWeight = _dataClone->weight();
320 if (0. == eventWeight * eventWeight) continue ;
321 if (_weightSq) eventWeight = _dataClone->weightSquared() ;
322
323 Double_t term = -eventWeight * pdfClone->getLogVal(_normSet);
324
325
326 Double_t y = eventWeight - sumWeightCarry;
327 Double_t t = sumWeight + y;
328 sumWeightCarry = (t - sumWeight) - y;
329 sumWeight = t;
330
331 y = term - carry;
332 t = result + y;
333 carry = (t - result) - y;
334 result = t;
335 }
336
337 // include the extended maximum likelihood term, if requested
338 if(_extended && _setNum==_extSet) {
339 if (_weightSq) {
340
341 // Calculate sum of weights-squared here for extended term
342 Double_t sumW2(0), sumW2carry(0);
343 for (i=0 ; i<_dataClone->numEntries() ; i++) {
344 _dataClone->get(i);
345 Double_t y = _dataClone->weightSquared() - sumW2carry;
346 Double_t t = sumW2 + y;
347 sumW2carry = (t - sumW2) - y;
348 sumW2 = t;
349 }
350
351 Double_t expected= pdfClone->expectedEvents(_dataClone->get());
352
353 // Adjust calculation of extended term with W^2 weighting: adjust poisson such that
354 // estimate of Nexpected stays at the same value, but has a different variance, rescale
355 // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] which is
356 // the effective weight of the Poisson term.
357 // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
358 // weighted by the effective weight sum[w^2]/ sum[w] in the likelihood.
359 // Since here we compute the likelihood with the weight square we need to multiply by the
360 // square of the effective weight
361 // expectedW = expected * sum[w] / sum[w^2] : effective expected entries
362 // observedW = sum[w] * sum[w] / sum[w^2] : effective observed entries
363 // The extended term for the likelihood weighted by the square of the weight will be then:
364 // (sum[w^2]/ sum[w] )^2 * expectedW - (sum[w^2]/ sum[w] )^2 * observedW * log (expectedW) and this is
365 // using the previous expressions for expectedW and observedW
366 // sum[w^2] / sum[w] * expected - sum[w^2] * log (expectedW)
367 // and since the weights are constants in the likelihood we can use log(expected) instead of log(expectedW)
368
369 Double_t expectedW2 = expected * sumW2 / _dataClone->sumEntries() ;
370 Double_t extra= expectedW2 - sumW2*log(expected );
371
372 // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
373
374 Double_t y = extra - carry ;
375
376 Double_t t = result + y;
377 carry = (t - result) - y;
378 result = t;
379 } else {
380 Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
381 Double_t t = result + y;
382 carry = (t - result) - y;
383 result = t;
384 }
385 }
386 }
387
388
389 // If part of simultaneous PDF normalize probability over
390 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
391 if (_simCount>1) {
392 Double_t y = sumWeight*log(1.0*_simCount) - carry;
393 Double_t t = result + y;
394 carry = (t - result) - y;
395 result = t;
396 }
397
398 //timer.Stop() ;
399 //cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ;
400
401 // At the end of the first full calculation, wire the caches
402 if (_first) {
403 _first = kFALSE ;
405 }
406
407
408 // Check if value offset flag is set.
409 if (_doOffset) {
410
411 // If no offset is stored enable this feature now
412 if (_offset==0 && result !=0 ) {
413 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
414 _offset = result ;
415 _offsetCarry = carry;
416 }
417
418 // Substract offset
419 Double_t y = -_offset - (carry + _offsetCarry);
420 Double_t t = result + y;
421 carry = (t - result) - y;
422 result = t;
423 }
424
425
426 _evalCarry = carry;
427 return result ;
428}
429
430
431
432
#define e(i)
Definition: RSha256.hxx:103
#define coutI(a)
Definition: RooMsgService.h:31
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
#define N
char name[80]
Definition: TGX11.cxx:109
double log(double)
char * Form(const char *fmt,...)
RooArgSet * getObservables(const RooArgSet &set, Bool_t valueOnly=kTRUE) const
Return the observables of this pdf given a set of observables.
Definition: RooAbsArg.h:240
void setAttribute(const Text_t *name, Bool_t value=kTRUE)
Set (default) or clear a named boolean attribute of this object.
Definition: RooAbsArg.cxx:256
void wireAllCaches()
Definition: RooAbsArg.cxx:2323
void setValueDirty() const
Definition: RooAbsArg.h:486
Int_t getSize() const
RooAbsArg * first() const
virtual void recalculateCache(const RooArgSet *, Int_t, Int_t, Int_t, Bool_t)
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
virtual const RooArgSet * get() const
Definition: RooAbsData.h:80
RooAbsDataStore * store()
Definition: RooAbsData.h:56
virtual Double_t sumEntries() const =0
virtual Bool_t valid() const
Definition: RooAbsData.h:86
virtual Double_t weight() const =0
virtual Double_t weightSquared() const =0
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:306
RooAbsOptTestStatistic is the abstract base class for test statistics objects that evaluate a functio...
virtual Double_t extendedTerm(Double_t observedEvents, const RooArgSet *nset=0) const
Returned the extended likelihood term (Nexpect - Nobserved*log(NExpected) of this PDF for the given n...
Definition: RooAbsPdf.cxx:661
virtual Double_t getLogVal(const RooArgSet *set=0) const
Return the log of the current value with given normalization An error message is printed if the argum...
Definition: RooAbsPdf.cxx:621
virtual Double_t expectedEvents(const RooArgSet *nset) const
Return expected number of events from this p.d.f for use in extended likelihood calculations.
Definition: RooAbsPdf.cxx:2920
virtual Double_t getMax(const char *name=0) const
Get maximum of currently defined range.
virtual Double_t getMin(const char *name=0) const
Get miniminum of currently defined range.
Double_t getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:81
void logEvalError(const char *message, const char *serverValueString=0) const
Log evaluation error message.
Int_t _nGof
Number of designated set to calculated extended term.
Int_t _nCPU
GOF MP Split mode specified by component (when Auto is active)
pRooAbsTestStatistic * _gofArray
GOFOpMode _gofOpMode
Is object initialized
Double_t _evalCarry
avoids loss of precision
Double_t _offsetCarry
Offset.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooCmdArg is a named container for two doubles, two integers two object points and three string point...
Definition: RooCmdArg.h:27
Class RooCmdConfig is a configurable parser for RooCmdArg named arguments.
Definition: RooCmdConfig.h:27
Class RooNLLVar implements a a -log(likelihood) calculation from a dataset and a PDF.
Definition: RooNLLVar.h:26
void applyWeightSquared(Bool_t flag)
Definition: RooNLLVar.cxx:218
RooNLLVar()
Definition: RooNLLVar.h:30
RooRealSumPdf * _binnedPdf
Definition: RooNLLVar.h:75
Bool_t _extended
Definition: RooNLLVar.h:67
virtual Double_t evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
Calculate and return likelihood on subset of data.
Definition: RooNLLVar.cxx:248
static RooArgSet _emptySet
Definition: RooNLLVar.h:65
virtual ~RooNLLVar()
Destructor.
Definition: RooNLLVar.cxx:209
Bool_t _first
Definition: RooNLLVar.h:70
std::vector< Double_t > _binw
Definition: RooNLLVar.h:74
Double_t _offsetCarrySaveW2
Definition: RooNLLVar.h:72
Double_t _offsetSaveW2
Definition: RooNLLVar.h:71
Bool_t _weightSq
Definition: RooNLLVar.h:69
The class RooRealSumPdf implements a PDF constructed from a sum of functions:
Definition: RooRealSumPdf.h:24
virtual std::list< Double_t > * binBoundaries(RooAbsRealLValue &, Double_t, Double_t) const
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Double_t y[n]
Definition: legend1.C:17
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Template specialisation used in RooAbsArg:
@ BulkPartition
Definition: RooGlobalFunc.h:60
@ Minimization
Definition: RooGlobalFunc.h:57
static constexpr double pc
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition: TMath.cxx:486