Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVar.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17/**
18\file RooNLLVar.cxx
19\class RooNLLVar
20\ingroup Roofitcore
21
22Class RooNLLVar implements a -log(likelihood) calculation from a dataset
23and a PDF. The NLL is calculated as
24\f[
25 \sum_\mathrm{data} -\log( \mathrm{pdf}(x_\mathrm{data}))
26\f]
27In extended mode, a
28\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
29**/
30
31#include "RooNLLVar.h"
32
33#include "RooAbsData.h"
34#include "RooAbsPdf.h"
35#include "RooCmdConfig.h"
36#include "RooMsgService.h"
37#include "RooAbsDataStore.h"
38#include "RooRealMPFE.h"
39#include "RooRealSumPdf.h"
40#include "RooRealVar.h"
41#include "RooProdPdf.h"
42#include "RooNaNPacker.h"
43#include "RooDataHist.h"
44
45#ifdef ROOFIT_CHECK_CACHED_VALUES
46#include <iomanip>
47#endif
48
49#include "TMath.h"
50#include "Math/Util.h"
51
52#include <algorithm>
53
54namespace {
55 template<class ...Args>
56 RooAbsTestStatistic::Configuration makeRooAbsTestStatisticCfg(Args const& ... args) {
58 cfg.rangeName = RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",args...);
59 cfg.addCoefRangeName = RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",args...);
60 cfg.nCPU = RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,args...);
62 cfg.verbose = static_cast<bool>(RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,args...));
63 cfg.splitCutRange = static_cast<bool>(RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,args...));
64 cfg.cloneInputData = static_cast<bool>(RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,args...));
65 cfg.integrateOverBinsPrecision = RooCmdConfig::decodeDoubleOnTheFly("RooNLLVar::RooNLLVar", "IntegrateBins", 0, -1., {args...});
66 return cfg;
67 }
68}
69
71
73
75
76////////////////////////////////////////////////////////////////////////////////
77/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
78///
79/// Argument | Description
80/// -------------------------|------------
81/// Extended() | Include extended term in calculation
82/// NumCPU() | Activate parallel processing feature
83/// Range() | Fit only selected region
84/// SumCoefRange() | Set the range in which to interpret the coefficients of RooAddPdf components
85/// SplitRange() | Fit range is split by index category of simultaneous PDF
86/// ConditionalObservables() | Define conditional observables
87/// Verbose() | Verbose output of GOF framework classes
88/// CloneData() | Clone input dataset for internal use (default is true)
89/// BatchMode() | Evaluate batches of data events (faster if PDFs support it)
90/// IntegrateBins() | Integrate PDF within each bin. This sets the desired precision. Only useful for binned fits.
91RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
92 const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
93 const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
94 const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
95 RooAbsOptTestStatistic(name,title,pdf,indata,
96 *RooCmdConfig::decodeSetOnTheFly(
97 "RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet,
98 arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
99 makeRooAbsTestStatisticCfg(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
100{
101 RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
102 pc.allowUndefined() ;
103 pc.defineInt("extended","Extended",0,false) ;
104 pc.defineInt("BatchMode", "BatchMode", 0, false);
105
106 pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
107 pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
108 pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
109
110 _extended = pc.getInt("extended") ;
111 _skipZeroWeights = true;
112}
113
114
115////////////////////////////////////////////////////////////////////////////////
116/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
117/// For internal use.
118
119RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
120 bool extended, RooAbsTestStatistic::Configuration const& cfg) :
121 RooNLLVar{name, title, pdf, indata, RooArgSet(), extended, cfg} {}
122
123
124////////////////////////////////////////////////////////////////////////////////
125/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
126/// For internal use.
127
128RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
129 const RooArgSet& projDeps,
130 bool extended, RooAbsTestStatistic::Configuration const& cfg) :
131 RooAbsOptTestStatistic(name,title,pdf,indata,projDeps, cfg),
132 _extended(extended)
133{
134 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
135 // for a binned likelihood calculation
136 _binnedPdf = cfg.binnedL ? static_cast<RooRealSumPdf*>(_funcClone) : nullptr ;
137
138 // Retrieve and cache bin widths needed to convert un-normalized binnedPdf values back to yields
139 if (_binnedPdf) {
140
141 // The Active label will disable pdf integral calculations
142 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
143
144 RooArgSet obs;
146 if (obs.size()!=1) {
147 _binnedPdf = nullptr;
148 } else {
149 auto* var = static_cast<RooRealVar*>(obs.first());
150 std::unique_ptr<std::list<double>> boundaries{_binnedPdf->binBoundaries(*var,var->getMin(),var->getMax())};
151 auto biter = boundaries->begin() ;
152 _binw.reserve(boundaries->size()-1) ;
153 double lastBound = (*biter) ;
154 ++biter ;
155 while (biter!=boundaries->end()) {
156 _binw.push_back((*biter) - lastBound);
157 lastBound = (*biter) ;
158 ++biter ;
159 }
160 }
161
162 _skipZeroWeights = false;
163 } else {
164 _skipZeroWeights = true;
165 }
166}
167
168
169
170////////////////////////////////////////////////////////////////////////////////
171/// Copy constructor
172
173RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
175 _extended(other._extended),
176 _weightSq(other._weightSq),
177 _offsetSaveW2(other._offsetSaveW2),
178 _binw(other._binw),
179 _binnedPdf{other._binnedPdf}
180{
181}
182
183
184////////////////////////////////////////////////////////////////////////////////
185/// Create a test statistic using several properties of the current instance. This is used to duplicate
186/// the test statistic in multi-processing scenarios.
187RooAbsTestStatistic* RooNLLVar::create(const char *name, const char *title, RooAbsReal& pdf, RooAbsData& adata,
188 const RooArgSet& projDeps, RooAbsTestStatistic::Configuration const& cfg) {
189 RooAbsPdf & thePdf = dynamic_cast<RooAbsPdf&>(pdf);
190 // check if pdf can be extended
191 bool extendedPdf = _extended && thePdf.canBeExtended();
192
193 auto testStat = new RooNLLVar(name, title, thePdf, adata, projDeps, extendedPdf, cfg);
194 return testStat;
195}
196
197
198////////////////////////////////////////////////////////////////////////////////
199
201{
202 if (_gofOpMode==Slave) {
203 if (flag != _weightSq) {
204 _weightSq = flag;
205 std::swap(_offset, _offsetSaveW2);
206 }
208 } else if ( _gofOpMode==MPMaster) {
209 for (int i=0 ; i<_nCPU ; i++)
210 _mpfeArray[i]->applyNLLWeightSquared(flag);
211 } else if ( _gofOpMode==SimMaster) {
212 for(auto& gof : _gofArray)
213 static_cast<RooNLLVar&>(*gof).applyWeightSquared(flag);
214 }
215}
216
217
218////////////////////////////////////////////////////////////////////////////////
219/// Calculate and return likelihood on subset of data.
220/// \param[in] firstEvent First event to be processed.
221/// \param[in] lastEvent First event not to be processed, any more.
222/// \param[in] stepSize Steps between events.
223/// \note For batch computations, the step size **must** be one.
224///
225/// If this an extended likelihood, the extended term is added to the return likelihood
226/// in the batch that encounters the event with index 0.
227
228double RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
229{
230 // Throughout the calculation, we use Kahan's algorithm for summing to
231 // prevent loss of precision - this is a factor four more expensive than
232 // straight addition, but since evaluating the PDF is usually much more
233 // expensive than that, we tolerate the additional cost...
235 double sumWeight{0.0};
236
237 auto * pdfClone = static_cast<RooAbsPdf*>(_funcClone);
238
239
240 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
241 if (_binnedPdf) {
242 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
243 for (auto i=firstEvent ; i<lastEvent ; i+=stepSize) {
244
245 _dataClone->get(i) ;
246
247 double eventWeight = _dataClone->weight();
248
249
250 // Calculate log(Poisson(N|mu) for this bin
251 double N = eventWeight ;
252 double mu = _binnedPdf->getVal()*_binw[i] ;
253 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
254
255 if (mu<=0 && N>0) {
256
257 // Catch error condition: data present where zero events are predicted
258 logEvalError(Form("Observed %f events in bin %lu with zero event yield",N,(unsigned long)i)) ;
259
260 } else if (std::abs(mu)<1e-10 && std::abs(N)<1e-10) {
261
262 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
263 // since log(mu)=0. No update of result is required since term=0.
264
265 } else {
266
267 result += -1*(-mu + N * std::log(mu) - TMath::LnGamma(N+1));
268 sumWeightKahanSum += eventWeight;
269
270 }
271 }
272
273 sumWeight = sumWeightKahanSum.Sum();
274
275 } else { //unbinned PDF
276
277 std::tie(result, sumWeight) = computeScalar(stepSize, firstEvent, lastEvent);
278
279 // include the extended maximum likelihood term, if requested
280 if(_extended && _setNum==_extSet) {
281 result += pdfClone->extendedTerm(*_dataClone, _weightSq, _doBinOffset);
282 }
283 } //unbinned PDF
284
285
286 // If part of simultaneous PDF normalize probability over
287 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
288 if (_simCount>1) {
289 result += sumWeight * std::log(static_cast<double>(_simCount));
290 }
291
292
293 // At the end of the first full calculation, wire the caches
294 if (_first) {
295 _first = false ;
297 }
298
299
300 // Check if value offset flag is set.
301 if (_doOffset) {
302
303 // If no offset is stored enable this feature now
304 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
305 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result.Sum() << std::endl ;
306 _offset = result ;
307 }
308
309 // Subtract offset
310 result -= _offset;
311 }
312
313 _evalCarry = result.Carry();
314 return result.Sum() ;
315}
316
317RooNLLVar::ComputeResult RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
318 auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
319 return computeScalarFunc(pdfClone, _dataClone, _normSet, _weightSq, stepSize, firstEvent, lastEvent, _doBinOffset);
320}
321
322// static function, also used from TestStatistics::RooUnbinnedL
324 RooArgSet *normSet, bool weightSq, std::size_t stepSize,
325 std::size_t firstEvent, std::size_t lastEvent, bool doBinOffset)
326{
329 RooNaNPacker packedNaN(0.f);
330 const double logSumW = std::log(dataClone->sumEntries());
331
332 auto* dataHist = doBinOffset ? static_cast<RooDataHist*>(dataClone) : nullptr;
333
334 for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
335 dataClone->get(i) ;
336
337 double weight = dataClone->weight(); //FIXME
338 const double ni = weight;
339
340 if (0. == weight * weight) continue ;
341 if (weightSq) weight = dataClone->weightSquared() ;
342
343 double logProba = pdfClone->getLogVal(normSet);
344
345 if(doBinOffset) {
346 logProba -= std::log(ni) - std::log(dataHist->binVolume(i)) - logSumW;
347 }
348
349 const double term = -weight * logProba;
350
351 kahanWeight.Add(weight);
352 kahanProb.Add(term);
353 packedNaN.accumulate(term);
354 }
355
356 if (packedNaN.getPayload() != 0.) {
357 // Some events with evaluation errors. Return "badness" of errors.
358 return {ROOT::Math::KahanSum<double>{packedNaN.getNaNWithPayload()}, kahanWeight.Sum()};
359 }
360
361 return {kahanProb, kahanWeight.Sum()};
362}
#define e(i)
Definition RSha256.hxx:103
#define coutI(a)
#define ClassImp(name)
Definition Rtypes.h:377
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2468
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
void Add(T x)
Single-element accumulation. Will not vectorise.
Definition Util.h:165
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
void wireAllCaches()
void setValueDirty()
Mark the element dirty. This forces a re-evaluation when a value is requested.
Definition RooAbsArg.h:491
void setAttribute(const Text_t *name, bool value=true)
Set (default) or clear a named boolean attribute of this object.
Storage_t::size_type size() const
RooAbsArg * first() const
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:57
virtual double weight() const =0
virtual double sumEntries() const =0
Return effective number of entries in dataset, i.e., sum all weights.
virtual const RooArgSet * get() const
Definition RooAbsData.h:101
virtual double weightSquared() const =0
RooAbsOptTestStatistic is the abstract base class for test statistics objects that evaluate a functio...
RooAbsReal * _funcClone
Pointer to internal clone of input function.
bool _skipZeroWeights
! Whether to skip entries with weight zero in the evaluation
RooArgSet * _normSet
Pointer to set with observables used for normalization.
RooAbsData * _dataClone
Pointer to internal clone if input data.
bool canBeExtended() const
If true, PDF can provide extended likelihood term.
Definition RooAbsPdf.h:256
virtual double getLogVal(const RooArgSet *set=nullptr) const
Return the log of the current value with given normalization An error message is printed if the argum...
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition RooAbsReal.h:59
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
void logEvalError(const char *message, const char *serverValueString=nullptr) const
Log evaluation error message.
RooAbsTestStatistic is the abstract base class for all test statistics.
Int_t _setNum
Partition number of this instance in parallel calculation mode.
double _evalCarry
! carry of Kahan sum in evaluatePartition
Int_t _nCPU
Number of processors to use in parallel calculation mode.
GOFOpMode _gofOpMode
Operation mode of test statistic instance.
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _extSet
! Number of designated set to calculated extended term
std::vector< std::unique_ptr< RooAbsTestStatistic > > _gofArray
! Array of sub-contexts representing part of the combined test statistic
pRooRealMPFE * _mpfeArray
! Array of parallel execution frond ends
bool _doOffset
Apply interval value offset to control numeric precision?
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
RooCmdArg is a named container for two doubles, two integers two object points and three string point...
Definition RooCmdArg.h:26
Class RooCmdConfig is a configurable parser for RooCmdArg named arguments.
bool process(const RooCmdArg &arg)
Process given RooCmdArg.
Int_t getInt(const char *name, Int_t defaultValue=0)
Return integer property registered with name 'name'.
bool defineInt(const char *name, const char *argName, Int_t intNum, Int_t defValue=0)
Define integer property name 'name' mapped to integer in slot 'intNum' in RooCmdArg with name argName...
static double decodeDoubleOnTheFly(const char *callerID, const char *cmdArgName, int idx, double defVal, std::initializer_list< std::reference_wrapper< const RooCmdArg > > args)
Find a given double in a list of RooCmdArg.
void allowUndefined(bool flag=true)
If flag is true the processing of unrecognized RooCmdArgs is not considered an error.
static std::string decodeStringOnTheFly(const char *callerID, const char *cmdArgName, Int_t intIdx, const char *defVal, Args_t &&...args)
Static decoder function allows to retrieve string property from set of RooCmdArgs For use in base mem...
static Int_t decodeIntOnTheFly(const char *callerID, const char *cmdArgName, Int_t intIdx, Int_t defVal, Args_t &&...args)
Static decoder function allows to retrieve integer property from set of RooCmdArgs For use in base me...
The RooDataHist is a container class to hold N-dimensional binned data.
Definition RooDataHist.h:39
Class RooNLLVar implements a -log(likelihood) calculation from a dataset and a PDF.
Definition RooNLLVar.h:27
ComputeResult computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const
static RooNLLVar::ComputeResult computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone, RooArgSet *normSet, bool weightSq, std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent, bool doBinOffset=false)
RooRealSumPdf * _binnedPdf
!
Definition RooNLLVar.h:83
bool _doBinOffset
Definition RooNLLVar.h:77
ROOT::Math::KahanSum< double > _offsetSaveW2
!
Definition RooNLLVar.h:80
static RooArgSet _emptySet
Definition RooNLLVar.h:71
void applyWeightSquared(bool flag) override
Disables or enables the usage of squared weights.
std::vector< double > _binw
!
Definition RooNLLVar.h:82
std::pair< ROOT::Math::KahanSum< double >, double > ComputeResult
Definition RooNLLVar.h:60
bool _extended
Definition RooNLLVar.h:76
RooNLLVar(const char *name, const char *title, RooAbsPdf &pdf, RooAbsData &data, const RooCmdArg &arg1={}, const RooCmdArg &arg2={}, const RooCmdArg &arg3={}, const RooCmdArg &arg4={}, const RooCmdArg &arg5={}, const RooCmdArg &arg6={}, const RooCmdArg &arg7={}, const RooCmdArg &arg8={}, const RooCmdArg &arg9={})
Construct likelihood from given p.d.f and (binned or unbinned dataset)
Definition RooNLLVar.cxx:91
RooAbsTestStatistic * create(const char *name, const char *title, RooAbsReal &pdf, RooAbsData &adata, const RooArgSet &projDeps, RooAbsTestStatistic::Configuration const &cfg) override
Create a test statistic using several properties of the current instance.
bool _first
!
Definition RooNLLVar.h:79
bool _weightSq
Apply weights squared?
Definition RooNLLVar.h:78
double evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const override
Calculate and return likelihood on subset of data.
The class RooRealSumPdf implements a PDF constructed from a sum of functions:
std::list< double > * binBoundaries(RooAbsRealLValue &, double, double) const override
Retrieve bin boundaries if this distribution is binned in obs.
RooRealVar represents a variable that can be changed from the outside.
Definition RooRealVar.h:37
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
@ BulkPartition
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
std::string rangeName
Stores the configuration parameters for RooAbsTestStatistic.
Little struct that can pack a float into the unused bits of the mantissa of a NaN double.
float getPayload() const
Retrieve packed float.
double getNaNWithPayload() const
Retrieve a NaN with the current float payload packed into the mantissa.
void accumulate(double val)
Accumulate a packed float from another NaN into this.