Logo ROOT  
Reference Guide
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the p.d.f..
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include "RooFit.h"
34
35#include <limits>
36#include <algorithm>
37#include <cmath>
38#include <iostream>
39#include "TMath.h"
40#include "snprintf.h"
41#include "RooKeysPdf.h"
42#include "RooAbsReal.h"
43#include "RooRealVar.h"
44#include "RooRandom.h"
45#include "RooDataSet.h"
46#include "RooTrace.h"
47
48#include "TError.h"
49
50using namespace std;
51
53
56
57////////////////////////////////////////////////////////////////////////////////
58/// coverity[UNINIT_CTOR]
59
60 RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
61 _mirrorLeft(kFALSE), _mirrorRight(kFALSE),
62 _asymLeft(kFALSE), _asymRight(kFALSE)
63{
65}
66
67////////////////////////////////////////////////////////////////////////////////
68/// cache stuff about x
69
70RooKeysPdf::RooKeysPdf(const char *name, const char *title,
71 RooAbsReal& x, RooDataSet& data,
72 Mirror mirror, Double_t rho) :
73 RooAbsPdf(name,title),
74 _x("x","observable",this,x),
75 _nEvents(0),
76 _dataPts(0),
77 _dataWgts(0),
78 _weights(0),
79 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
80 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
81 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
82 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
83 _rho(rho)
84{
85 snprintf(_varName, 128,"%s", x.GetName());
86 RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
87 _lo = real.getMin();
88 _hi = real.getMax();
89 _binWidth = (_hi-_lo)/(_nPoints-1);
90
91 // form the lookup table
92 LoadDataSet(data);
94}
95
96////////////////////////////////////////////////////////////////////////////////
97/// cache stuff about x
98
99RooKeysPdf::RooKeysPdf(const char *name, const char *title,
100 RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
101 Mirror mirror, Double_t rho) :
102 RooAbsPdf(name,title),
103 _x("x","Observable",this,xpdf),
104 _nEvents(0),
105 _dataPts(0),
106 _dataWgts(0),
107 _weights(0),
108 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
109 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
110 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
111 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
112 _rho(rho)
113{
114 snprintf(_varName, 128,"%s", xdata.GetName());
115 RooAbsRealLValue& real= (RooRealVar&)(xdata);
116 _lo = real.getMin();
117 _hi = real.getMax();
118 _binWidth = (_hi-_lo)/(_nPoints-1);
119
120 // form the lookup table
121 LoadDataSet(data);
123}
124
125////////////////////////////////////////////////////////////////////////////////
126
127RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
128 RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
129 _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
130 _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
131 _asymLeft(other._asymLeft), _asymRight(other._asymRight),
132 _rho( other._rho ) {
133 // cache stuff about x
134 snprintf(_varName, 128, "%s", other._varName );
135 _lo = other._lo;
136 _hi = other._hi;
137 _binWidth = other._binWidth;
138
139 // copy over data and weights... not necessary, commented out for speed
140// _dataPts = new Double_t[_nEvents];
141// _weights = new Double_t[_nEvents];
142// for (Int_t i= 0; i<_nEvents; i++) {
143// _dataPts[i]= other._dataPts[i];
144// _weights[i]= other._weights[i];
145// }
146
147 // copy over the lookup table
148 for (Int_t i= 0; i<_nPoints+1; i++)
149 _lookupTable[i]= other._lookupTable[i];
150
152}
153
154////////////////////////////////////////////////////////////////////////////////
155
157 delete[] _dataPts;
158 delete[] _dataWgts;
159 delete[] _weights;
160
162}
163
164////////////////////////////////////////////////////////////////////////////////
165/// small helper structure
166
167namespace {
168 struct Data {
169 Double_t x;
170 Double_t w;
171 };
172 // helper to order two Data structures
173 struct cmp {
174 inline bool operator()(const struct Data& a, const struct Data& b) const
175 { return a.x < b.x; }
176 };
177}
179 delete[] _dataPts;
180 delete[] _dataWgts;
181 delete[] _weights;
182
183 std::vector<Data> tmp;
184 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
185 Double_t x0 = 0., x1 = 0., x2 = 0.;
186 _sumWgt = 0.;
187 // read the data set into tmp and accumulate some statistics
188 RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
189 for (Int_t i = 0; i < data.numEntries(); ++i) {
190 data.get(i);
191 const Double_t x = real.getVal();
192 const Double_t w = data.weight();
193 x0 += w;
194 x1 += w * x;
195 x2 += w * x * x;
197
198 Data p;
199 p.x = x, p.w = w;
200 tmp.push_back(p);
201 if (_mirrorLeft) {
202 p.x = 2. * _lo - x;
203 tmp.push_back(p);
204 }
205 if (_mirrorRight) {
206 p.x = 2. * _hi - x;
207 tmp.push_back(p);
208 }
209 }
210 // sort the entire data set so that values of x are increasing
211 std::sort(tmp.begin(), tmp.end(), cmp());
212
213 // copy the sorted data set to its final destination
214 _nEvents = tmp.size();
217 for (unsigned i = 0; i < tmp.size(); ++i) {
218 _dataPts[i] = tmp[i].x;
219 _dataWgts[i] = tmp[i].w;
220 }
221 {
222 // free tmp
223 std::vector<Data> tmp2;
224 tmp2.swap(tmp);
225 }
226
227 Double_t meanv=x1/x0;
228 Double_t sigmav=std::sqrt(x2/x0-meanv*meanv);
229 Double_t h=std::pow(Double_t(4)/Double_t(3),0.2)*std::pow(_nEvents,-0.2)*_rho;
230 Double_t hmin=h*sigmav*std::sqrt(2.)/10;
231 Double_t norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0));
232
234 for(Int_t j=0;j<_nEvents;++j) {
235 _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav));
236 if (_weights[j]<hmin) _weights[j]=hmin;
237 }
238
239 // The idea below is that beyond nSigma sigma, the value of the exponential
240 // in the Gaussian is well below the machine precision of a double, so it
241 // does not contribute any more. That way, we can limit how many bins of the
242 // binned approximation in _lookupTable we have to touch when filling it.
243 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
244 for(Int_t j=0;j<_nEvents;++j) {
245 const Double_t xlo = std::min(_hi,
246 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
247 const Double_t xhi = std::max(_lo,
248 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
249 if (xlo >= xhi) continue;
250 const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
251 const Double_t weightratio = _dataWgts[j] / _weights[j];
252 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
253 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
254 const Double_t x = (Double_t(_nPoints - binlo) * _lo +
255 Double_t(binlo) * _hi) / Double_t(_nPoints);
256 Double_t chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
257 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
258 _lookupTable[k] += weightratio * std::exp(- chi * chi);
259 }
260 }
261 if (_asymLeft) {
262 for(Int_t j=0;j<_nEvents;++j) {
263 const Double_t xlo = std::min(_hi,
264 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
265 const Double_t xhi = std::max(_lo,
266 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
267 if (xlo >= xhi) continue;
268 const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
269 const Double_t weightratio = _dataWgts[j] / _weights[j];
270 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
271 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
272 const Double_t x = (Double_t(_nPoints - binlo) * _lo +
273 Double_t(binlo) * _hi) / Double_t(_nPoints);
274 Double_t chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
275 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
276 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
277 }
278 }
279 }
280 if (_asymRight) {
281 for(Int_t j=0;j<_nEvents;++j) {
282 const Double_t xlo = std::min(_hi,
283 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
284 const Double_t xhi = std::max(_lo,
285 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
286 if (xlo >= xhi) continue;
287 const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
288 const Double_t weightratio = _dataWgts[j] / _weights[j];
289 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
290 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
291 const Double_t x = (Double_t(_nPoints - binlo) * _lo +
292 Double_t(binlo) * _hi) / Double_t(_nPoints);
293 Double_t chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
294 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
295 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
296 }
297 }
298 }
299 static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
300 for (Int_t i=0;i<_nPoints+1;++i)
301 _lookupTable[i] /= sqrt2pi * _sumWgt;
302}
303
304////////////////////////////////////////////////////////////////////////////////
305
307 Int_t i = (Int_t)floor((Double_t(_x)-_lo)/_binWidth);
308 if (i<0) {
309// cerr << "got point below lower bound:"
310// << Double_t(_x) << " < " << _lo
311// << " -- performing linear extrapolation..." << endl;
312 i=0;
313 }
314 if (i>_nPoints-1) {
315// cerr << "got point above upper bound:"
316// << Double_t(_x) << " > " << _hi
317// << " -- performing linear extrapolation..." << endl;
318 i=_nPoints-1;
319 }
321
322 // for now do simple linear interpolation.
323 // one day replace by splines...
324 Double_t ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
325 if (ret<0) ret=0 ;
326 return ret ;
327}
328
330 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
331{
332 if (matchArgs(allVars, analVars, _x)) return 1;
333 return 0;
334}
335
336Double_t RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
337{
338 R__ASSERT(1 == code);
339 // this code is based on _lookupTable and uses linear interpolation, just as
340 // evaluate(); integration is done using the trapez rule
341 const Double_t xmin = std::max(_lo, _x.min(rangeName));
342 const Double_t xmax = std::min(_hi, _x.max(rangeName));
343 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
344 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
345 _nPoints - 1);
346 Double_t sum = 0.;
347 // sum up complete bins in middle
348 if (imin + 1 < imax)
349 sum += _lookupTable[imin + 1] + _lookupTable[imax];
350 for (Int_t i = imin + 2; i < imax; ++i)
351 sum += 2. * _lookupTable[i];
352 sum *= _binWidth * 0.5;
353 // treat incomplete bins
354 const Double_t dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
355 const Double_t dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
356 if (imin < imax) {
357 // first bin
358 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
359 _lookupTable[imin] + dxmin *
360 (_lookupTable[imin + 1] - _lookupTable[imin]));
361 // last bin
362 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
363 _lookupTable[imax] + dxmax *
364 (_lookupTable[imax + 1] - _lookupTable[imax]));
365 } else if (imin == imax) {
366 // first bin == last bin
367 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
368 _lookupTable[imin] + dxmin *
369 (_lookupTable[imin + 1] - _lookupTable[imin]) +
370 _lookupTable[imax] + dxmax *
371 (_lookupTable[imax + 1] - _lookupTable[imax]));
372 }
373 return sum;
374}
375
377{
378 if (vars.contains(*_x.absArg())) return 1;
379 return 0;
380}
381
383{
384 R__ASSERT(1 == code);
385 Double_t max = -std::numeric_limits<Double_t>::max();
386 for (Int_t i = 0; i <= _nPoints; ++i)
387 if (max < _lookupTable[i]) max = _lookupTable[i];
388 return max;
389}
390
391////////////////////////////////////////////////////////////////////////////////
392
394 Double_t y=0;
395 // since data is sorted, we can be a little faster because we know which data
396 // points contribute
397 Double_t* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
398 x - _nSigma * sigmav);
399 if (it >= (_dataPts + _nEvents)) return 0.;
400 Double_t* iend = std::upper_bound(it, _dataPts + _nEvents,
401 x + _nSigma * sigmav);
402 for ( ; it < iend; ++it) {
403 const Double_t r = (x - *it) / sigmav;
404 y += std::exp(-0.5 * r * r);
405 }
406
407 static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
408 return y/(sigmav*sqrt2pi*_nEvents);
409}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define b(i)
Definition: RSha256.hxx:100
#define h(i)
Definition: RSha256.hxx:106
static const double x2[5]
static const double x1[5]
#define TRACE_DESTROY
Definition: RooTrace.h:24
#define TRACE_CREATE
Definition: RooTrace.h:23
int Int_t
Definition: RtypesCore.h:45
const Bool_t kFALSE
Definition: RtypesCore.h:101
double Double_t
Definition: RtypesCore.h:59
#define ClassImp(name)
Definition: Rtypes.h:364
#define R__ASSERT(e)
Definition: TError.h:118
char name[80]
Definition: TGX11.cxx:110
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
TRObject operator()(const T1 &t1) const
#define snprintf
Definition: civetweb.c:1540
Bool_t contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
virtual Int_t numEntries() const
Return number of entries in dataset, i.e., count unweighted entries.
Definition: RooAbsData.cxx:376
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
virtual Double_t getMax(const char *name=0) const
Get maximum of currently defined range.
virtual Double_t getMin(const char *name=0) const
Get miniminum of currently defined range.
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:63
Bool_t matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
Double_t getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:93
RooAbsArg * absArg() const
Definition: RooArgProxy.h:37
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:35
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:36
virtual const RooArgSet * get(Int_t index) const override
Return RooArgSet with coordinates of event 'index'.
virtual Double_t weight() const override
Return event weight of current event.
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition: RooKeysPdf.h:25
Bool_t _asymLeft
Definition: RooKeysPdf.h:72
RooKeysPdf()
coverity[UNINIT_CTOR]
Definition: RooKeysPdf.cxx:60
Bool_t _mirrorRight
Definition: RooKeysPdf.h:71
virtual Double_t analyticalIntegral(Int_t code, const char *rangeName=0) const
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
Definition: RooKeysPdf.cxx:336
virtual Double_t maxVal(Int_t code) const
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:382
Char_t _varName[128]
Definition: RooKeysPdf.h:75
Int_t _nEvents
Definition: RooKeysPdf.h:60
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:178
RooRealProxy _x
Definition: RooKeysPdf.h:52
Bool_t _asymRight
Definition: RooKeysPdf.h:72
Double_t _hi
Definition: RooKeysPdf.h:76
virtual Int_t getMaxVal(const RooArgSet &vars) const
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:376
Double_t _lo
Definition: RooKeysPdf.h:76
Double_t _sumWgt
Definition: RooKeysPdf.h:64
virtual ~RooKeysPdf()
Definition: RooKeysPdf.cxx:156
Double_t * _weights
Definition: RooKeysPdf.h:63
Double_t g(Double_t x, Double_t sigma) const
Definition: RooKeysPdf.cxx:393
Bool_t _mirrorLeft
Definition: RooKeysPdf.h:71
static const Double_t _nSigma
Definition: RooKeysPdf.h:58
Double_t _binWidth
Definition: RooKeysPdf.h:76
Double_t * _dataPts
Definition: RooKeysPdf.h:61
Double_t _rho
Definition: RooKeysPdf.h:77
Double_t * _dataWgts
Definition: RooKeysPdf.h:62
Double_t _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
Double_t evaluate() const
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
Definition: RooKeysPdf.cxx:306
virtual Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
Definition: RooKeysPdf.cxx:329
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:39
double min(const char *rname=0) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
double max(const char *rname=0) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
const T & arg() const
Return reference to object held in proxy.
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
constexpr Double_t Pi()
Definition: TMath.h:37
auto * a
Definition: textangle.C:12
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
REAL epsilon
Definition: triangle.c:618