Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Jonas Rembser, CERN 2021
7 * Emmanouil Michalainas, CERN 2021
8 *
9 * Copyright (c) 2021, CERN
10 *
11 * Redistribution and use in source and binary forms,
12 * with or without modification, are permitted according to the terms
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
14 */
15
16/**
17\file RooNLLVarNew.cxx
18\class RooNLLVarNew
19\ingroup Roofitcore
20
21This is a simple class designed to produce the nll values needed by the fitter.
22This class calls functions from `RooBatchCompute` library to provide faster
23computation times.
24**/
25
27
28#include <RooHistPdf.h>
29#include <RooBatchCompute.h>
30#include <RooDataHist.h>
31#include <RooNaNPacker.h>
32#include <RooConstVar.h>
33#include <RooRealVar.h>
34#include <RooSetProxy.h>
36
37#include "RooFitImplHelpers.h"
38
39#include <ROOT/StringUtils.hxx>
40
41#include <TClass.h>
42#include <TMath.h>
43#include <Math/Util.h>
44
45#include <numeric>
46#include <stdexcept>
47#include <vector>
48
49namespace RooFit::Detail {
50
51// Declare constexpr static members to make them available if odr-used in C++14.
52constexpr const char *RooNLLVarNew::weightVarName;
53constexpr const char *RooNLLVarNew::weightVarNameSumW2;
54constexpr const char *RooNLLVarNew::binVolumeVarName;
55constexpr const char *RooNLLVarNew::weightErrorLoVarName;
56constexpr const char *RooNLLVarNew::weightErrorHiVarName;
57
58namespace {
59
60// Use RooConstVar for dummies such that they don't get included in getParameters().
61std::unique_ptr<RooConstVar> dummyVar(const char *name)
62{
63 return std::make_unique<RooConstVar>(name, name, 1.0);
64}
65
66// Helper class to represent a template pdf based on the fit dataset.
67class RooOffsetPdf : public RooAbsPdf {
68public:
69 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
70 : RooAbsPdf(name, title),
71 _observables("!observables", "List of observables", this),
72 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
73 {
74 for (RooAbsArg *obs : observables) {
75 _observables.add(*obs);
76 }
77 }
78 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
80 _observables("!servers", this, other._observables),
81 _weightVar{"!weightVar", this, other._weightVar}
82 {
83 }
84 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
85
86 void doEval(RooFit::EvalContext &ctx) const override
87 {
88 std::span<double> output = ctx.output();
89 std::size_t nEvents = output.size();
90
91 std::span<const double> weights = ctx.at(_weightVar);
92
93 // Create the template histogram from the data. This operation is very
94 // expensive, but since the offset only depends on the observables it
95 // only has to be done once.
96
97 RooDataHist dataHist{"data", "data", _observables};
98 // Loop over events to fill the histogram
99 for (std::size_t i = 0; i < nEvents; ++i) {
100 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
101 var->setVal(ctx.at(var)[i]);
102 }
103 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
104 }
105
106 // Lookup bin weights via RooHistPdf
107 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
108 for (std::size_t i = 0; i < nEvents; ++i) {
109 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
110 var->setVal(ctx.at(var)[i]);
111 }
112 output[i] = pdf.getVal(_observables);
113 }
114 }
115
116private:
117 double evaluate() const override { return 0.0; } // should never be called
118
119 RooSetProxy _observables;
121};
122
123} // namespace
124
125/// Construct either an NLL or a chi-squared test statistic.
126/// \param func The pdf or function to evaluate. For `Statistic::NLL` a
127/// RooAbsPdf is required, and for `Statistic::Chi2` any RooAbsReal is accepted.
128RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsReal &func, RooArgSet const &observables,
129 Config const &cfg)
130 : RooAbsReal(name, title),
131 _func{"func", "func", this, func},
132 _weightVar{"weightVar", "weightVar", this, dummyVar(weightVarName)},
136{
137 auto *pdf = dynamic_cast<RooAbsPdf *>(&func);
138
139 if (_statistic == Statistic::Chi2) {
140 // Signal to RooEvaluatorWrapper::setData that zero-weight bins must be
141 // retained (chi2 needs every bin's prediction, even where the data is
142 // empty).
143 setAttribute("Chi2EvaluationActive");
144 _funcMode = !pdf ? FuncMode::Function : (cfg.extended ? FuncMode::ExtendedPdf : FuncMode::Pdf);
145 } else {
146 _binnedL = pdf && pdf->getAttribute("BinnedLikelihoodActive");
147 }
148
149 RooArgSet obs;
150 func.getObservables(&observables, obs);
151
152 // Extended mode needs an expected-events function for both NLL and chi2
153 // (NLL adds it as an extra additive term, chi2 uses it as the predicted
154 // yield normalisation). Skip it for binned NLL (where the yields come
155 // directly from the pdf) and for chi2 Function mode (where the function
156 // values are directly the predicted yields).
157 const bool wantsExpectedEvents = pdf && ((_statistic == Statistic::NLL && cfg.extended && !_binnedL) ||
158 (_statistic == Statistic::Chi2 && _funcMode == FuncMode::ExtendedPdf));
160 std::unique_ptr<RooAbsReal> expectedEvents = pdf->createExpectedEventsFunc(&obs);
161 if (expectedEvents) {
162 _expectedEvents =
163 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
164 addOwnedComponents(std::move(expectedEvents));
165 }
166 }
167
168 if (_statistic == Statistic::NLL) {
169 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can
170 // directly be interpreted as yields and don't need to be multiplied by
171 // the bin widths. That's why we don't need to even fill them in this
172 // case.
173 if (_binnedL && !pdf->getAttribute("BinnedLikelihoodActiveYields")) {
175 }
176
177 enableOffsetting(cfg.offsetMode == RooFit::OffsetMode::Initial);
179
180 // In the binned likelihood code path, we directly use that data weights
181 // for the offsetting.
182 if (!_binnedL && _doBinOffset) {
183 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_func", "_offset_func", obs, *_weightVar);
184 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
185 addOwnedComponents(std::move(offsetPdf));
186 }
187 } else {
188 // Chi2-only proxies: per-bin volumes, plus per-bin asymmetric errors
189 // when Poisson error mode is requested.
190 auto binVolumeDummy = std::make_unique<RooConstVar>(binVolumeVarName, binVolumeVarName, 1.0);
191 _binVolumes = std::make_unique<RooTemplateProxy<RooAbsReal>>(binVolumeVarName, binVolumeVarName, this,
192 *binVolumeDummy, true, false);
193 addOwnedComponents(std::move(binVolumeDummy));
194
196 auto errLoDummy = std::make_unique<RooConstVar>(weightErrorLoVarName, weightErrorLoVarName, 1.0);
197 auto errHiDummy = std::make_unique<RooConstVar>(weightErrorHiVarName, weightErrorHiVarName, 1.0);
198 _weightErrLo = std::make_unique<RooTemplateProxy<RooAbsReal>>(weightErrorLoVarName, weightErrorLoVarName, this,
199 *errLoDummy, true, false);
200 _weightErrHi = std::make_unique<RooTemplateProxy<RooAbsReal>>(weightErrorHiVarName, weightErrorHiVarName, this,
201 *errHiDummy, true, false);
202 addOwnedComponents(std::move(errLoDummy));
203 addOwnedComponents(std::move(errHiDummy));
204 }
205 }
206
208}
209
210RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
212 _func{"func", this, other._func},
213 _weightVar{"weightVar", this, other._weightVar},
214 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
223 _prefix{other._prefix},
224 _binw{other._binw}
225{
226 if (other._expectedEvents) {
227 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
228 }
229 if (other._binVolumes) {
230 _binVolumes = std::make_unique<RooTemplateProxy<RooAbsReal>>(binVolumeVarName, this, *other._binVolumes);
231 }
232 if (other._weightErrLo) {
233 _weightErrLo = std::make_unique<RooTemplateProxy<RooAbsReal>>(weightErrorLoVarName, this, *other._weightErrLo);
234 }
235 if (other._weightErrHi) {
236 _weightErrHi = std::make_unique<RooTemplateProxy<RooAbsReal>>(weightErrorHiVarName, this, *other._weightErrHi);
237 }
238}
239
240void RooNLLVarNew::fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
241{
242 // Check if the bin widths were already filled
243 if (!_binw.empty()) {
244 return;
245 }
246
247 if (observables.size() != 1) {
248 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
249 } else {
250 auto *var = static_cast<RooRealVar *>(observables.first());
251 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
252 std::list<double>::iterator biter = boundaries->begin();
253 _binw.resize(boundaries->size() - 1);
254 double lastBound = (*biter);
255 ++biter;
256 int ibin = 0;
257 while (biter != boundaries->end()) {
258 _binw[ibin] = (*biter) - lastBound;
259 lastBound = (*biter);
260 ibin++;
261 ++biter;
262 }
263 }
264}
265
266void RooNLLVarNew::doEvalBinnedL(RooFit::EvalContext &ctx, std::span<const double> preds,
267 std::span<const double> weights) const
268{
271
272 const bool predsAreYields = _binw.empty();
273
274 for (std::size_t i = 0; i < preds.size(); ++i) {
275
276 // Calculate log(Poisson(N|mu) for this bin
277 double N = weights[i];
278 double mu = preds[i];
279 if (!predsAreYields) {
280 mu *= _binw[i];
281 }
282
283 if (mu <= 0 && N > 0) {
284 // Catch error condition: data present where zero events are predicted
285 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
286 } else {
289 }
290 }
291
293}
294
295void RooNLLVarNew::doEvalChi2(RooFit::EvalContext &ctx, std::span<const double> preds, std::span<const double> weights,
296 std::span<const double> weightsSumW2) const
297{
298 // Error type None implies zero sigma for every bin: the chi2 is undefined
299 // everywhere but empty bins. Match the legacy behaviour of returning zero.
302 return;
303 }
304
305 auto config = ctx.config(this);
306 std::span<const double> binVol = ctx.at(*_binVolumes);
307 std::span<const double> errLo = _weightErrLo ? ctx.at(*_weightErrLo) : std::span<const double>{};
308 std::span<const double> errHi = _weightErrHi ? ctx.at(*_weightErrHi) : std::span<const double>{};
309
310 const double sumWeight = RooBatchCompute::reduceSum(config, weights.data(), weights.size());
311
312 double normFactor = 1.0;
313 switch (_funcMode) {
314 case FuncMode::Pdf: normFactor = sumWeight; break;
315 case FuncMode::ExtendedPdf: normFactor = ctx.at(*_expectedEvents)[0]; break;
316 case FuncMode::Function: normFactor = 1.0; break;
317 }
318
321
322 for (std::size_t i = 0; i < preds.size(); ++i) {
323 const double N = weights[i];
324 const double mu = preds[i] * normFactor * binVol[i];
325 const double diff = mu - N;
326
327 double sigma2;
328 switch (_chi2ErrorType) {
329 case RooDataHist::SumW2: sigma2 = weightsSumW2[i]; break;
331 // Poisson errors are asymmetric: choose the side facing the prediction.
332 const double err = diff > 0 ? errHi[i] : errLo[i];
333 sigma2 = err * err;
334 break;
335 }
336 default: sigma2 = mu; break; // Expected
337 }
338
339 // Skip bins where data, prediction and error are all zero (matches legacy RooChi2Var).
340 if (sigma2 == 0.0 && N == 0.0 && mu == 0.0) {
341 continue;
342 }
343 if (sigma2 <= 0.0) {
344 logEvalError(Form("chi2 bin %lu has non-positive error; term replaced with NaN", (unsigned long)i));
345 result += std::numeric_limits<double>::quiet_NaN();
346 continue;
347 }
348
349 result += diff * diff / sigma2;
351 }
352
354}
355
356void RooNLLVarNew::doEval(RooFit::EvalContext &ctx) const
357{
358 std::span<const double> weights = ctx.at(_weightVar);
359 std::span<const double> weightsSumW2 = ctx.at(_weightSquaredVar);
360
361 if (_statistic == Statistic::Chi2) {
362 return doEvalChi2(ctx, ctx.at(&*_func), weights, weightsSumW2);
363 }
364
365 if (_binnedL) {
366 return doEvalBinnedL(ctx, ctx.at(&*_func), _weightSquared ? weightsSumW2 : weights);
367 }
368
369 auto config = ctx.config(this);
370
371 auto probas = ctx.at(_func);
372
373 double sumWeight = RooBatchCompute::reduceSum(config, weights.data(), weights.size());
374 double sumWeight2 = 0.;
375 if (_expectedEvents && _weightSquared) {
377 }
378
379 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
380 _doBinOffset ? ctx.at(*_offsetPdf) : std::span<const double>{});
381
382 if (nllOut.nInfiniteValues > 0) {
383 oocoutW(&*_func, Eval) << "RooAbsPdf::getLogVal(" << _func->GetName()
384 << ") WARNING: top-level pdf has some infinite values" << std::endl;
385 }
386 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
387 _func->logEvalError("getLogVal() top-level p.d.f not greater than zero");
388 }
389 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
390 _func->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
391 }
392
393 if (_expectedEvents) {
394 // The unbinned NLL path is only reached for pdf inputs, so the cast is safe.
395 auto &pdf = static_cast<RooAbsPdf &>(const_cast<RooAbsReal &>(*_func));
396 std::span<const double> expected = ctx.at(*_expectedEvents);
397 nllOut.nllSum += pdf.extendedTerm(sumWeight, expected[0], _weightSquared ? sumWeight2 : 0.0, _doBinOffset);
398 }
399
400 finalizeResult(ctx, {nllOut.nllSum, nllOut.nllSumCarry}, sumWeight);
401}
402
403////////////////////////////////////////////////////////////////////////////////
404/// Sets the prefix for the special variables of this NLL, like weights or bin
405/// volumes.
406/// \param[in] prefix The prefix to add to the observables and weight names.
407void RooNLLVarNew::setPrefix(std::string const &prefix)
408{
409 _prefix = prefix;
410
412}
413
414void RooNLLVarNew::resetWeightVarNames()
415{
416 _weightVar->SetName((_prefix + weightVarName).c_str());
417 _weightSquaredVar->SetName((_prefix + weightVarNameSumW2).c_str());
418 if (_offsetPdf) {
419 (*_offsetPdf)->SetName((_prefix + "_offset_func").c_str());
420 }
421 if (_binVolumes) {
422 (*_binVolumes)->SetName((_prefix + binVolumeVarName).c_str());
423 }
424 if (_weightErrLo) {
425 (*_weightErrLo)->SetName((_prefix + weightErrorLoVarName).c_str());
426 }
427 if (_weightErrHi) {
428 (*_weightErrHi)->SetName((_prefix + weightErrorHiVarName).c_str());
429 }
430}
431
432////////////////////////////////////////////////////////////////////////////////
433/// Toggles the weight square correction.
434void RooNLLVarNew::applyWeightSquared(bool flag)
435{
436 if (_statistic == Statistic::Chi2) {
437 if (flag) {
438 coutW(Fitting) << "RooNLLVarNew::applyWeightSquared(" << GetName()
439 << ") has no effect on a chi-squared evaluator; ignoring." << std::endl;
440 }
441 return;
442 }
444}
445
446void RooNLLVarNew::enableOffsetting(bool flag)
447{
448 _doOffset = flag;
450}
451
452void RooNLLVarNew::finalizeResult(RooFit::EvalContext &ctx, ROOT::Math::KahanSum<double> result, double weightSum) const
453{
454 // If part of simultaneous PDF normalize probability over
455 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
456 // If we do bin-by bin offsetting, we don't do this because it cancels out.
457 // The correction is specific to NLL; it has no meaning for chi2.
458 if (_statistic == Statistic::NLL && !_doBinOffset && _simCount > 1) {
459 result += weightSum * std::log(static_cast<double>(_simCount));
460 }
461
462 // Check if value offset flag is set.
463 if (_doOffset) {
464
465 // If no offset is stored enable this feature now
466 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
467 _offset = result;
468 }
469 }
470 ctx.setOutputWithOffset(this, result, _offset);
471}
472
473} // namespace RooFit::Detail
474
475/// \endcond
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
#define oocoutW(o, a)
#define coutW(a)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:148
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:141
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:76
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:32
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:63
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:40
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:72
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
std::span< double > output()
RooBatchCompute::Config config(RooAbsArg const *arg) const
void setOutputWithOffset(RooAbsArg const *arg, ROOT::Math::KahanSum< double > val, ROOT::Math::KahanSum< double > const &offset)
Sets the output value with an offset.
A probability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
Variable that can be changed from the outside.
Definition RooRealVar.h:37
Mother of all ROOT objects.
Definition TObject.h:42
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
double nll(double pdf, double weight, int binnedL, int doBinOffset)
Definition MathFuncs.h:452
void evaluate(typename Architecture_t::Tensor_t &A, EActivationFunction f)
Apply the given activation function to each value in the given tensor A.
Definition Functions.h:98
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)