Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Jonas Rembser, CERN 2021
7 * Emmanouil Michalainas, CERN 2021
8 *
9 * Copyright (c) 2021, CERN
10 *
11 * Redistribution and use in source and binary forms,
12 * with or without modification, are permitted according to the terms
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
14 */
15
16/**
17\file RooNLLVarNew.cxx
18\class RooNLLVarNew
19\ingroup Roofitcore
20
21This is a simple class designed to produce the nll values needed by the fitter.
22This class calls functions from `RooBatchCompute` library to provide faster
23computation times.
24**/
25
26#include "RooNLLVarNew.h"
27
28#include <RooHistPdf.h>
29#include <RooBatchCompute.h>
30#include <RooDataHist.h>
31#include <RooNaNPacker.h>
32#include <RooConstVar.h>
33#include <RooRealVar.h>
34#include <RooSetProxy.h>
36
37#include "RooFitImplHelpers.h"
38
39#include <ROOT/StringUtils.hxx>
40
41#include <TClass.h>
42#include <TMath.h>
43#include <Math/Util.h>
44
45#include <numeric>
46#include <stdexcept>
47#include <vector>
48
49// Declare constexpr static members to make them available if odr-used in C++14.
50constexpr const char *RooNLLVarNew::weightVarName;
51constexpr const char *RooNLLVarNew::weightVarNameSumW2;
52
53namespace {
54
55// Use RooConstVar for dummies such that they don't get included in getParameters().
56RooConstVar *dummyVar(const char *name)
57{
58 return new RooConstVar(name, name, 1.0);
59}
60
61// Helper class to represent a template pdf based on the fit dataset.
62class RooOffsetPdf : public RooAbsPdf {
63public:
64 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
65 : RooAbsPdf(name, title),
66 _observables("!observables", "List of observables", this),
67 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
68 {
69 for (RooAbsArg *obs : observables) {
70 _observables.add(*obs);
71 }
72 }
73 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
74 : RooAbsPdf(other, name),
75 _observables("!servers", this, other._observables),
76 _weightVar{"!weightVar", this, other._weightVar}
77 {
78 }
79 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
80
81 void computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const override
82 {
83 std::span<const double> weights = dataMap.at(_weightVar);
84
85 // Create the template histogram from the data. This operation is very
86 // expensive, but since the offset only depends on the observables it
87 // only has to be done once.
88
89 RooDataHist dataHist{"data", "data", _observables};
90 // Loop over events to fill the histogram
91 for (std::size_t i = 0; i < nEvents; ++i) {
92 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
93 var->setVal(dataMap.at(var)[i]);
94 }
95 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
96 }
97
98 // Lookup bin weights via RooHistPdf
99 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
100 for (std::size_t i = 0; i < nEvents; ++i) {
101 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
102 var->setVal(dataMap.at(var)[i]);
103 }
104 output[i] = pdf.getVal(_observables);
105 }
106 }
107
108private:
109 double evaluate() const override { return 0.0; } // should never be called
110
111 RooSetProxy _observables;
113};
114
115} // namespace
116
117/** Construct a RooNLLVarNew
118\param name the name
119\param title the title
120\param pdf The pdf for which the nll is computed for
121\param observables The observabes of the pdf
122\param isExtended Set to true if this is an extended fit
123**/
124RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables,
125 bool isExtended, RooFit::OffsetMode offsetMode)
126 : RooAbsReal(name, title),
127 _pdf{"pdf", "pdf", this, pdf},
128 _weightVar{"weightVar", "weightVar", this, *dummyVar(weightVarName), true, false, true},
129 _weightSquaredVar{weightVarNameSumW2, weightVarNameSumW2, this, *dummyVar("weightSquardVar"), true, false, true},
130 _binnedL{pdf.getAttribute("BinnedLikelihoodActive")}
131{
132 RooArgSet obs;
133 pdf.getObservables(&observables, obs);
134
135 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can directly
136 // be interpreted as yields and don't need to be multiplied by the bin
137 // widths. That's why we don't need to even fill them in this case.
138 if (_binnedL && !pdf.getAttribute("BinnedLikelihoodActiveYields")) {
139 fillBinWidthsFromPdfBoundaries(pdf, obs);
140 }
141
142 if (isExtended && !_binnedL) {
143 std::unique_ptr<RooAbsReal> expectedEvents = pdf.createExpectedEventsFunc(&obs);
144 if (expectedEvents) {
145 _expectedEvents =
146 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
147 addOwnedComponents(std::move(expectedEvents));
148 }
149 }
150
151 resetWeightVarNames();
154
155 // In the binned likelihood code path, we directly use that data weights for
156 // the offsetting.
157 if (!_binnedL && _doBinOffset) {
158 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_pdf", "_offset_pdf", obs, *_weightVar);
159 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
160 addOwnedComponents(std::move(offsetPdf));
161 }
162}
163
164RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
165 : RooAbsReal(other, name),
166 _pdf{"pdf", this, other._pdf},
167 _weightVar{"weightVar", this, other._weightVar},
168 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
169 _weightSquared{other._weightSquared},
170 _binnedL{other._binnedL},
171 _doOffset{other._doOffset},
172 _simCount{other._simCount},
173 _prefix{other._prefix},
174 _binw{other._binw}
175{
176 if (other._expectedEvents) {
177 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
178 }
179}
180
181void RooNLLVarNew::fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
182{
183 // Check if the bin widths were already filled
184 if (!_binw.empty()) {
185 return;
186 }
187
188 if (observables.size() != 1) {
189 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
190 } else {
191 auto *var = static_cast<RooRealVar *>(observables.first());
192 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
193 std::list<double>::iterator biter = boundaries->begin();
194 _binw.resize(boundaries->size() - 1);
195 double lastBound = (*biter);
196 ++biter;
197 int ibin = 0;
198 while (biter != boundaries->end()) {
199 _binw[ibin] = (*biter) - lastBound;
200 lastBound = (*biter);
201 ibin++;
202 ++biter;
203 }
204 }
205}
206
207double RooNLLVarNew::computeBatchBinnedL(std::span<const double> preds, std::span<const double> weights) const
208{
210 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
211
212 const bool predsAreYields = _binw.empty();
213
214 for (std::size_t i = 0; i < preds.size(); ++i) {
215
216 double eventWeight = weights[i];
217
218 // Calculate log(Poisson(N|mu) for this bin
219 double N = eventWeight;
220 double mu = preds[i];
221 if (!predsAreYields) {
222 mu *= _binw[i];
223 }
224
225 if (mu <= 0 && N > 0) {
226
227 // Catch error condition: data present where zero events are predicted
228 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
229
230 } else if (std::abs(mu) < 1e-10 && std::abs(N) < 1e-10) {
231
232 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
233 // since log(mu)=0. No update of result is required since term=0.
234
235 } else {
236
237 double term = 0.0;
238 if (_doBinOffset) {
239 term -= -mu + N + N * (std::log(mu) - std::log(N));
240 } else {
241 term -= -mu + N * std::log(mu) - TMath::LnGamma(N + 1);
242 }
243 result += term;
244 sumWeightKahanSum += eventWeight;
245 }
246 }
247
248 return finalizeResult(result, sumWeightKahanSum.Sum());
249}
250
251/** Compute multiple negative logs of probabilities.
252
253\param output An array of doubles where the computation results will be stored
254\param nOut not used
255\note nEvents is the number of events to be processed (the dataMap size)
256\param dataMap A map containing spans with the input data for the computation
257**/
258void RooNLLVarNew::computeBatch(double *output, size_t /*nOut*/, RooFit::Detail::DataMap const &dataMap) const
259{
260 std::span<const double> weights = dataMap.at(_weightVar);
261 std::span<const double> weightsSumW2 = dataMap.at(_weightSquaredVar);
262
263 if (_binnedL) {
264 output[0] = computeBatchBinnedL(dataMap.at(&*_pdf), _weightSquared ? weightsSumW2 : weights);
265 return;
266 }
267
268 auto config = dataMap.config(this);
269
270 auto probas = dataMap.at(_pdf);
271
272 _sumWeight = weights.size() == 1 ? weights[0] * probas.size()
273 : RooBatchCompute::reduceSum(config, weights.data(), weights.size());
274 if (_expectedEvents && _weightSquared && _sumWeight2 == 0.0) {
275 _sumWeight2 = weights.size() == 1 ? weightsSumW2[0] * probas.size()
276 : RooBatchCompute::reduceSum(config, weightsSumW2.data(), weightsSumW2.size());
277 }
278
279 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
280 _doBinOffset ? dataMap.at(*_offsetPdf) : std::span<const double>{});
281
282 if (nllOut.nLargeValues > 0) {
283 oocoutW(&*_pdf, Eval) << "RooAbsPdf::getLogVal(" << _pdf->GetName()
284 << ") WARNING: top-level pdf has unexpectedly large values" << std::endl;
285 }
286 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
287 _pdf->logEvalError("getLogVal() top-level p.d.f not greater than zero");
288 }
289 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
290 _pdf->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
291 }
292
293 if (_expectedEvents) {
294 std::span<const double> expected = dataMap.at(*_expectedEvents);
295 nllOut.nllSum += _pdf->extendedTerm(_sumWeight, expected[0], _weightSquared ? _sumWeight2 : 0.0, _doBinOffset);
296 }
297
298 output[0] = finalizeResult({nllOut.nllSum, nllOut.nllSumCarry}, _sumWeight);
299}
300
301void RooNLLVarNew::getParametersHook(const RooArgSet * /*nset*/, RooArgSet *params, bool /*stripDisconnected*/) const
302{
303 // strip away the special variables
304 params->remove(RooArgList{*_weightVar, *_weightSquaredVar}, true, true);
305}
306
307////////////////////////////////////////////////////////////////////////////////
308/// Sets the prefix for the special variables of this NLL, like weights or bin
309/// volumes.
310/// \param[in] prefix The prefix to add to the observables and weight names.
311void RooNLLVarNew::setPrefix(std::string const &prefix)
312{
313 _prefix = prefix;
314
315 resetWeightVarNames();
316}
317
318void RooNLLVarNew::resetWeightVarNames()
319{
320 _weightVar->SetName((_prefix + weightVarName).c_str());
321 _weightSquaredVar->SetName((_prefix + weightVarNameSumW2).c_str());
322 if (_offsetPdf) {
323 (*_offsetPdf)->SetName((_prefix + "_offset_pdf").c_str());
324 }
325}
326
327////////////////////////////////////////////////////////////////////////////////
328/// Toggles the weight square correction.
329void RooNLLVarNew::applyWeightSquared(bool flag)
330{
331 _weightSquared = flag;
332}
333
334void RooNLLVarNew::enableOffsetting(bool flag)
335{
336 _doOffset = flag;
338}
339
340double RooNLLVarNew::finalizeResult(ROOT::Math::KahanSum<double> result, double weightSum) const
341{
342 // If part of simultaneous PDF normalize probability over
343 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
344 // If we do bin-by bin offsetting, we don't do this because it cancels out
345 if (!_doBinOffset && _simCount > 1) {
346 result += weightSum * std::log(static_cast<double>(_simCount));
347 }
348
349 // Check if value offset flag is set.
350 if (_doOffset) {
351
352 // If no offset is stored enable this feature now
353 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
354 _offset = result;
355 }
356
357 // Subtract offset
358 if (!RooAbsReal::hideOffset()) {
359 result -= _offset;
360 }
361 }
362 return result.Sum();
363}
364
365void RooNLLVarNew::translate(RooFit::Detail::CodeSquashContext &ctx) const
366{
367 std::string weightSumName = RooFit::Detail::makeValidVarName(GetName()) + "WeightSum";
368 std::string resName = RooFit::Detail::makeValidVarName(GetName()) + "Result";
369 ctx.addResult(this, resName);
370 ctx.addToGlobalScope("double " + weightSumName + " = 0.0;\n");
371 ctx.addToGlobalScope("double " + resName + " = 0.0;\n");
372
373 const bool needWeightSum = _expectedEvents || _simCount > 1;
374
375 if (needWeightSum) {
376 auto scope = ctx.beginLoop(this);
377 ctx.addToCodeBody(weightSumName + " += " + ctx.getResult(*_weightVar) + ";\n");
378 }
379 if (_simCount > 1) {
380 std::string simCountStr = std::to_string(static_cast<double>(_simCount));
381 ctx.addToCodeBody(resName + " += " + weightSumName + " * std::log(" + simCountStr + ");\n");
382 }
383
384 // Begin loop scope for the observables and weight variable. If the weight
385 // is a scalar, the context will ignore it for the loop scope. The closing
386 // brackets of the loop is written at the end of the scopes lifetime.
387 {
388 auto scope = ctx.beginLoop(this);
389 std::string const &weight = ctx.getResult(_weightVar.arg());
390 std::string const &pdfName = ctx.getResult(_pdf.arg());
391
392 if (_binnedL) {
393 // Since we only support uniform binning, bin width is the same for all.
394 if (!_pdf->getAttribute("BinnedLikelihoodActiveYields")) {
395 std::stringstream errorMsg;
396 errorMsg << "RooNLLVarNew::translate(): binned likelihood optimization is only supported when raw pdf "
397 "values can be interpreted as yields."
398 << " This is not the case for HistFactory models written with ROOT versions before 6.26.00";
399 coutE(InputArguments) << errorMsg.str() << std::endl;
400 throw std::runtime_error(errorMsg.str());
401 }
402 std::string muName = pdfName;
403 ctx.addToCodeBody(this, resName + " += -1 * (-" + muName + " + " + weight + " * std::log(" + muName +
404 ") - TMath::LnGamma(" + weight + "+ 1));\n");
405 } else {
406 ctx.addToCodeBody(this, resName + " -= " + weight + " * std::log(" + pdfName + ");\n");
407 }
408 }
409 if (_expectedEvents) {
410 std::string expected = ctx.getResult(**_expectedEvents);
411 ctx.addToCodeBody(resName + " += " + expected + " - " + weightSumName + " * std::log(" + expected + ");\n");
412 }
413}
414
415/// \endcond
#define e(i)
Definition RSha256.hxx:103
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
bool _doOffset
Apply interval value offset to control numeric precision?
void enableOffsetting(bool flag) override
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
double evaluate() const override
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
#define oocoutW(o, a)
#define coutE(a)
bool _doBinOffset
Definition RooNLLVar.h:44
std::vector< double > _binw
!
Definition RooNLLVar.h:49
std::unique_ptr< RooAbsPdf > _offsetPdf
! An optional per-bin likelihood offset
Definition RooNLLVar.h:51
void enableBinOffsetting(bool on=true)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:77
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
bool getAttribute(const Text_t *name) const
Check if a named attribute is set. By default, all attributes are unset.
virtual bool remove(const RooAbsArg &var, bool silent=false, bool matchByNameOnly=false)
Remove the specified argument from our list.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual std::unique_ptr< RooAbsReal > createExpectedEventsFunc(const RooArgSet *nset) const
Returns an object that represents the expected number of events for a given normalization set,...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
static bool hideOffset()
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition RooArgList.h:22
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Represents a constant real-valued object.
Definition RooConstVar.h:23
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:39
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:66
A class to maintain the context for squashing of RooFit models into code.
void addResult(RooAbsArg const *key, std::string const &value)
A function to save an expression that includes/depends on the result of the input node.
void addToCodeBody(RooAbsArg const *klass, std::string const &in)
Adds the input string to the squashed code body.
std::string const & getResult(RooAbsArg const &arg)
Gets the result for the given node using the node name.
void addToGlobalScope(std::string const &str)
Adds the given string to the string block that will be emitted at the top of the squashed function.
std::unique_ptr< LoopScope > beginLoop(RooAbsArg const *in)
Create a RAII scope for iterating over vector observables.
RooBatchCompute::Config config(RooAbsArg const *arg) const
Definition DataMap.cxx:40
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
Definition DataMap.cxx:22
A propability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
Variable that can be changed from the outside.
Definition RooRealVar.h:37
Mother of all ROOT objects.
Definition TObject.h:41
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
std::string makeValidVarName(std::string const &in)
OffsetMode
For setting the offset mode with the Offset() command argument to RooAbsPdf::fitTo()
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
static void output()