Logo ROOT  
Reference Guide
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooRealVar.h"
41#include "RooRandom.h"
42#include "RooDataSet.h"
43#include "RooTrace.h"
44
45#include "TError.h"
46
47using namespace std;
48
50
51const double RooKeysPdf::_nSigma = std::sqrt(-2. *
53
54////////////////////////////////////////////////////////////////////////////////
55/// coverity[UNINIT_CTOR]
56
57 RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
58 _mirrorLeft(false), _mirrorRight(false),
59 _asymLeft(false), _asymRight(false)
60{
62}
63
64////////////////////////////////////////////////////////////////////////////////
65/// cache stuff about x
66
67RooKeysPdf::RooKeysPdf(const char *name, const char *title,
69 Mirror mirror, double rho) :
70 RooAbsPdf(name,title),
71 _x("x","observable",this,x),
72 _nEvents(0),
73 _dataPts(0),
74 _dataWgts(0),
75 _weights(0),
76 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
77 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
78 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
79 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
80 _rho(rho)
81{
82 snprintf(_varName, 128,"%s", x.GetName());
83 RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
84 _lo = real.getMin();
85 _hi = real.getMax();
86 _binWidth = (_hi-_lo)/(_nPoints-1);
87
88 // form the lookup table
91}
92
93////////////////////////////////////////////////////////////////////////////////
94/// cache stuff about x
95
96RooKeysPdf::RooKeysPdf(const char *name, const char *title,
97 RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
98 Mirror mirror, double rho) :
99 RooAbsPdf(name,title),
100 _x("x","Observable",this,xpdf),
101 _nEvents(0),
102 _dataPts(0),
103 _dataWgts(0),
104 _weights(0),
105 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
106 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
107 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
108 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
109 _rho(rho)
110{
111 snprintf(_varName, 128,"%s", xdata.GetName());
112 RooAbsRealLValue& real= (RooRealVar&)(xdata);
113 _lo = real.getMin();
114 _hi = real.getMax();
115 _binWidth = (_hi-_lo)/(_nPoints-1);
116
117 // form the lookup table
120}
121
122////////////////////////////////////////////////////////////////////////////////
123
124RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
125 RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
126 _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
127 _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
128 _asymLeft(other._asymLeft), _asymRight(other._asymRight),
129 _rho( other._rho ) {
130 // cache stuff about x
131 snprintf(_varName, 128, "%s", other._varName );
132 _lo = other._lo;
133 _hi = other._hi;
134 _binWidth = other._binWidth;
135
136 // copy over data and weights... not necessary, commented out for speed
137// _dataPts = new double[_nEvents];
138// _weights = new double[_nEvents];
139// for (Int_t i= 0; i<_nEvents; i++) {
140// _dataPts[i]= other._dataPts[i];
141// _weights[i]= other._weights[i];
142// }
143
144 // copy over the lookup table
145 for (Int_t i= 0; i<_nPoints+1; i++)
146 _lookupTable[i]= other._lookupTable[i];
147
149}
150
151////////////////////////////////////////////////////////////////////////////////
152
154 delete[] _dataPts;
155 delete[] _dataWgts;
156 delete[] _weights;
157
159}
160
161////////////////////////////////////////////////////////////////////////////////
162/// small helper structure
163
164namespace {
165 struct Data {
166 double x;
167 double w;
168 };
169 // helper to order two Data structures
170 struct cmp {
171 inline bool operator()(const struct Data& a, const struct Data& b) const
172 { return a.x < b.x; }
173 };
174}
176 delete[] _dataPts;
177 delete[] _dataWgts;
178 delete[] _weights;
179
180 std::vector<Data> tmp;
181 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
182 double x0 = 0., x1 = 0., x2 = 0.;
183 _sumWgt = 0.;
184 // read the data set into tmp and accumulate some statistics
185 RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
186 for (Int_t i = 0; i < data.numEntries(); ++i) {
187 data.get(i);
188 const double x = real.getVal();
189 const double w = data.weight();
190 x0 += w;
191 x1 += w * x;
192 x2 += w * x * x;
194
195 Data p;
196 p.x = x, p.w = w;
197 tmp.push_back(p);
198 if (_mirrorLeft) {
199 p.x = 2. * _lo - x;
200 tmp.push_back(p);
201 }
202 if (_mirrorRight) {
203 p.x = 2. * _hi - x;
204 tmp.push_back(p);
205 }
206 }
207 // sort the entire data set so that values of x are increasing
208 std::sort(tmp.begin(), tmp.end(), cmp());
209
210 // copy the sorted data set to its final destination
211 _nEvents = tmp.size();
212 _dataPts = new double[_nEvents];
213 _dataWgts = new double[_nEvents];
214 for (unsigned i = 0; i < tmp.size(); ++i) {
215 _dataPts[i] = tmp[i].x;
216 _dataWgts[i] = tmp[i].w;
217 }
218 {
219 // free tmp
220 std::vector<Data> tmp2;
221 tmp2.swap(tmp);
222 }
223
224 double meanv=x1/x0;
225 double sigmav=std::sqrt(x2/x0-meanv*meanv);
226 double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;
227 double hmin=h*sigmav*std::sqrt(2.)/10;
228 double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));
229
230 _weights=new double[_nEvents];
231 for(Int_t j=0;j<_nEvents;++j) {
232 _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));
233 if (_weights[j]<hmin) _weights[j]=hmin;
234 }
235
236 // The idea below is that beyond nSigma sigma, the value of the exponential
237 // in the Gaussian is well below the machine precision of a double, so it
238 // does not contribute any more. That way, we can limit how many bins of the
239 // binned approximation in _lookupTable we have to touch when filling it.
240 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
241 for(Int_t j=0;j<_nEvents;++j) {
242 const double xlo = std::min(_hi,
243 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
244 const double xhi = std::max(_lo,
245 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
246 if (xlo >= xhi) continue;
247 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
248 const double weightratio = _dataWgts[j] / _weights[j];
249 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
250 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
251 const double x = (double(_nPoints - binlo) * _lo +
252 double(binlo) * _hi) / double(_nPoints);
253 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
254 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
255 _lookupTable[k] += weightratio * std::exp(- chi * chi);
256 }
257 }
258 if (_asymLeft) {
259 for(Int_t j=0;j<_nEvents;++j) {
260 const double xlo = std::min(_hi,
261 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
262 const double xhi = std::max(_lo,
263 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
264 if (xlo >= xhi) continue;
265 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
266 const double weightratio = _dataWgts[j] / _weights[j];
267 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
268 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
269 const double x = (double(_nPoints - binlo) * _lo +
270 double(binlo) * _hi) / double(_nPoints);
271 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
272 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
273 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
274 }
275 }
276 }
277 if (_asymRight) {
278 for(Int_t j=0;j<_nEvents;++j) {
279 const double xlo = std::min(_hi,
280 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
281 const double xhi = std::max(_lo,
282 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
283 if (xlo >= xhi) continue;
284 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
285 const double weightratio = _dataWgts[j] / _weights[j];
286 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
287 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
288 const double x = (double(_nPoints - binlo) * _lo +
289 double(binlo) * _hi) / double(_nPoints);
290 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
291 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
292 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
293 }
294 }
295 }
296 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
297 for (Int_t i=0;i<_nPoints+1;++i)
298 _lookupTable[i] /= sqrt2pi * _sumWgt;
299}
300
301////////////////////////////////////////////////////////////////////////////////
302
303double RooKeysPdf::evaluate() const {
304 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
305 if (i<0) {
306// cerr << "got point below lower bound:"
307// << double(_x) << " < " << _lo
308// << " -- performing linear extrapolation..." << endl;
309 i=0;
310 }
311 if (i>_nPoints-1) {
312// cerr << "got point above upper bound:"
313// << double(_x) << " > " << _hi
314// << " -- performing linear extrapolation..." << endl;
315 i=_nPoints-1;
316 }
317 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
318
319 // for now do simple linear interpolation.
320 // one day replace by splines...
321 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
322 if (ret<0) ret=0 ;
323 return ret ;
324}
325
327 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
328{
329 if (matchArgs(allVars, analVars, _x)) return 1;
330 return 0;
331}
332
333double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
334{
335 R__ASSERT(1 == code);
336 // this code is based on _lookupTable and uses linear interpolation, just as
337 // evaluate(); integration is done using the trapez rule
338 const double xmin = std::max(_lo, _x.min(rangeName));
339 const double xmax = std::min(_hi, _x.max(rangeName));
340 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
341 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
342 _nPoints - 1);
343 double sum = 0.;
344 // sum up complete bins in middle
345 if (imin + 1 < imax)
346 sum += _lookupTable[imin + 1] + _lookupTable[imax];
347 for (Int_t i = imin + 2; i < imax; ++i)
348 sum += 2. * _lookupTable[i];
349 sum *= _binWidth * 0.5;
350 // treat incomplete bins
351 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
352 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
353 if (imin < imax) {
354 // first bin
355 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
356 _lookupTable[imin] + dxmin *
357 (_lookupTable[imin + 1] - _lookupTable[imin]));
358 // last bin
359 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
360 _lookupTable[imax] + dxmax *
361 (_lookupTable[imax + 1] - _lookupTable[imax]));
362 } else if (imin == imax) {
363 // first bin == last bin
364 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
365 _lookupTable[imin] + dxmin *
366 (_lookupTable[imin + 1] - _lookupTable[imin]) +
367 _lookupTable[imax] + dxmax *
368 (_lookupTable[imax + 1] - _lookupTable[imax]));
369 }
370 return sum;
371}
372
374{
375 if (vars.contains(*_x.absArg())) return 1;
376 return 0;
377}
378
379double RooKeysPdf::maxVal(Int_t code) const
380{
381 R__ASSERT(1 == code);
382 double max = -std::numeric_limits<double>::max();
383 for (Int_t i = 0; i <= _nPoints; ++i)
384 if (max < _lookupTable[i]) max = _lookupTable[i];
385 return max;
386}
387
388////////////////////////////////////////////////////////////////////////////////
389
390double RooKeysPdf::g(double x,double sigmav) const {
391 double y=0;
392 // since data is sorted, we can be a little faster because we know which data
393 // points contribute
394 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
395 x - _nSigma * sigmav);
396 if (it >= (_dataPts + _nEvents)) return 0.;
397 double* iend = std::upper_bound(it, _dataPts + _nEvents,
398 x + _nSigma * sigmav);
399 for ( ; it < iend; ++it) {
400 const double r = (x - *it) / sigmav;
401 y += std::exp(-0.5 * r * r);
402 }
403
404 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
405 return y/(sigmav*sqrt2pi);
406}
#define h(i)
Definition: RSha256.hxx:106
#define TRACE_DESTROY
Definition: RooTrace.h:24
#define TRACE_CREATE
Definition: RooTrace.h:23
int Int_t
Definition: RtypesCore.h:45
#define ClassImp(name)
Definition: Rtypes.h:375
#define R__ASSERT(e)
Definition: TError.h:118
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition: TGX11.cxx:110
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
TRObject operator()(const T1 &t1) const
#define snprintf
Definition: civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
virtual double getMax(const char *name=nullptr) const
Get maximum of currently defined range.
virtual double getMin(const char *name=nullptr) const
Get minimum of currently defined range.
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:62
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:91
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition: RooArgProxy.h:47
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:56
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:55
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition: RooKeysPdf.h:25
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
Definition: RooKeysPdf.cxx:326
double _binWidth
Definition: RooKeysPdf.h:76
double _sumWgt
Definition: RooKeysPdf.h:64
double * _dataWgts
Definition: RooKeysPdf.h:62
RooKeysPdf()
coverity[UNINIT_CTOR]
Definition: RooKeysPdf.cxx:57
double _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:379
bool _mirrorRight
Definition: RooKeysPdf.h:71
double _rho
Definition: RooKeysPdf.h:77
double _hi
Definition: RooKeysPdf.h:76
Char_t _varName[128]
Definition: RooKeysPdf.h:75
double g(double x, double sigma) const
Definition: RooKeysPdf.cxx:390
Int_t _nEvents
Definition: RooKeysPdf.h:60
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:175
RooRealProxy _x
Definition: RooKeysPdf.h:52
double analyticalIntegral(Int_t code, const char *rangeName=0) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
Definition: RooKeysPdf.cxx:333
double * _weights
Definition: RooKeysPdf.h:63
bool _mirrorLeft
Definition: RooKeysPdf.h:71
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
Definition: RooKeysPdf.cxx:303
~RooKeysPdf() override
Definition: RooKeysPdf.cxx:153
double _lo
Definition: RooKeysPdf.h:76
double * _dataPts
Definition: RooKeysPdf.h:61
bool _asymRight
Definition: RooKeysPdf.h:72
static const double _nSigma
Definition: RooKeysPdf.h:58
bool _asymLeft
Definition: RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:373
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:40
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
const T & arg() const
Return reference to object held in proxy.
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
const char * GetName() const override
Returns name of object.
Definition: TNamed.h:47
RVec< PromoteType< T > > floor(const RVec< T > &v)
Definition: RVec.hxx:1790
RVec< PromoteTypes< T0, T1 > > pow(const T0 &x, const RVec< T1 > &v)
Definition: RVec.hxx:1770
RVec< PromoteType< T > > log(const RVec< T > &v)
Definition: RVec.hxx:1765
RVec< PromoteType< T > > exp(const RVec< T > &v)
Definition: RVec.hxx:1761
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
constexpr Double_t Pi()
Definition: TMath.h:37
TArc a
Definition: textangle.C:12
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
double epsilon
Definition: triangle.c:618