Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooRealVar.h"
41#include "RooRandom.h"
42#include "RooDataSet.h"
43#include "RooTrace.h"
44
45#include "TError.h"
46
47using namespace std;
48
50
51const double RooKeysPdf::_nSigma = std::sqrt(-2. *
52 std::log(std::numeric_limits<double>::epsilon()));
53
54////////////////////////////////////////////////////////////////////////////////
55/// coverity[UNINIT_CTOR]
56
58{
60}
61
62////////////////////////////////////////////////////////////////////////////////
63/// cache stuff about x
64
65RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &x, RooDataSet &data, Mirror mirror, double rho)
66 : RooKeysPdf(name, title, x, static_cast<RooRealVar &>(x), data, mirror, rho)
67{
68}
69
70////////////////////////////////////////////////////////////////////////////////
71/// cache stuff about x
72
73RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &xpdf, RooRealVar &xdata, RooDataSet &data,
74 Mirror mirror, double rho)
75 : RooAbsPdf(name, title),
76 _x("x", "Observable", this, xpdf),
77 _mirrorLeft(mirror == MirrorLeft || mirror == MirrorBoth || mirror == MirrorLeftAsymRight),
78 _mirrorRight(mirror == MirrorRight || mirror == MirrorBoth || mirror == MirrorAsymLeftRight),
79 _asymLeft(mirror == MirrorAsymLeft || mirror == MirrorAsymLeftRight || mirror == MirrorAsymBoth),
80 _asymRight(mirror == MirrorAsymRight || mirror == MirrorLeftAsymRight || mirror == MirrorAsymBoth),
81 _lo(xdata.getMin()),
82 _hi(xdata.getMax()),
83 _binWidth((_hi - _lo) / (_nPoints - 1)),
84 _rho(rho)
85{
86 snprintf(_varName, 128,"%s", xdata.GetName());
87
88 // form the lookup table
91}
92
93////////////////////////////////////////////////////////////////////////////////
94
95RooKeysPdf::RooKeysPdf(const RooKeysPdf &other, const char *name)
96 : RooAbsPdf(other, name),
97 _x("x", this, other._x),
98 _nEvents(other._nEvents),
99 _mirrorLeft(other._mirrorLeft),
100 _mirrorRight(other._mirrorRight),
101 _asymLeft(other._asymLeft),
102 _asymRight(other._asymRight),
103 _lo(other._lo),
104 _hi(other._hi),
105 _binWidth(other._binWidth),
106 _rho(other._rho)
107{
108 // cache stuff about x
109 snprintf(_varName, 128, "%s", other._varName );
110
111 // copy over data and weights... not necessary, commented out for speed
112// _dataPts = new double[_nEvents];
113// _weights = new double[_nEvents];
114// for (Int_t i= 0; i<_nEvents; i++) {
115// _dataPts[i]= other._dataPts[i];
116// _weights[i]= other._weights[i];
117// }
118
119 // copy over the lookup table
120 for (Int_t i= 0; i<_nPoints+1; i++)
121 _lookupTable[i]= other._lookupTable[i];
122
124}
125
126////////////////////////////////////////////////////////////////////////////////
127
129 delete[] _dataPts;
130 delete[] _dataWgts;
131 delete[] _weights;
132
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// small helper structure
138
139namespace {
140 struct Data {
141 double x;
142 double w;
143 };
144 // helper to order two Data structures
145 struct cmp {
146 inline bool operator()(const struct Data& a, const struct Data& b) const
147 { return a.x < b.x; }
148 };
149}
151 delete[] _dataPts;
152 delete[] _dataWgts;
153 delete[] _weights;
154
155 std::vector<Data> tmp;
156 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
157 double x0 = 0.;
158 double x1 = 0.;
159 double x2 = 0.;
160 _sumWgt = 0.;
161 // read the data set into tmp and accumulate some statistics
162 RooRealVar& real = static_cast<RooRealVar&>(data.get()->operator[](_varName));
163 for (Int_t i = 0; i < data.numEntries(); ++i) {
164 data.get(i);
165 const double x = real.getVal();
166 const double w = data.weight();
167 x0 += w;
168 x1 += w * x;
169 x2 += w * x * x;
171
172 Data p;
173 p.x = x, p.w = w;
174 tmp.push_back(p);
175 if (_mirrorLeft) {
176 p.x = 2. * _lo - x;
177 tmp.push_back(p);
178 }
179 if (_mirrorRight) {
180 p.x = 2. * _hi - x;
181 tmp.push_back(p);
182 }
183 }
184 // sort the entire data set so that values of x are increasing
185 std::sort(tmp.begin(), tmp.end(), cmp());
186
187 // copy the sorted data set to its final destination
188 _nEvents = tmp.size();
189 _dataPts = new double[_nEvents];
190 _dataWgts = new double[_nEvents];
191 for (unsigned i = 0; i < tmp.size(); ++i) {
192 _dataPts[i] = tmp[i].x;
193 _dataWgts[i] = tmp[i].w;
194 }
195 {
196 // free tmp
197 std::vector<Data> tmp2;
198 tmp2.swap(tmp);
199 }
200
201 double meanv=x1/x0;
202 double sigmav=std::sqrt(x2/x0-meanv*meanv);
203 double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;
204 double hmin=h*sigmav*std::sqrt(2.)/10;
205 double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));
206
207 _weights=new double[_nEvents];
208 for(Int_t j=0;j<_nEvents;++j) {
209 _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));
210 if (_weights[j]<hmin) _weights[j]=hmin;
211 }
212
213 // The idea below is that beyond nSigma sigma, the value of the exponential
214 // in the Gaussian is well below the machine precision of a double, so it
215 // does not contribute any more. That way, we can limit how many bins of the
216 // binned approximation in _lookupTable we have to touch when filling it.
217 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
218 for(Int_t j=0;j<_nEvents;++j) {
219 const double xlo = std::min(_hi,
220 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
221 const double xhi = std::max(_lo,
222 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
223 if (xlo >= xhi) continue;
224 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
225 const double weightratio = _dataWgts[j] / _weights[j];
226 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
227 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
228 const double x = (double(_nPoints - binlo) * _lo +
229 double(binlo) * _hi) / double(_nPoints);
230 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
231 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
232 _lookupTable[k] += weightratio * std::exp(- chi * chi);
233 }
234 }
235 if (_asymLeft) {
236 for(Int_t j=0;j<_nEvents;++j) {
237 const double xlo = std::min(_hi,
238 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
239 const double xhi = std::max(_lo,
240 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
241 if (xlo >= xhi) continue;
242 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
243 const double weightratio = _dataWgts[j] / _weights[j];
244 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
245 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
246 const double x = (double(_nPoints - binlo) * _lo +
247 double(binlo) * _hi) / double(_nPoints);
248 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
249 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
250 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
251 }
252 }
253 }
254 if (_asymRight) {
255 for(Int_t j=0;j<_nEvents;++j) {
256 const double xlo = std::min(_hi,
257 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
258 const double xhi = std::max(_lo,
259 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
260 if (xlo >= xhi) continue;
261 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
262 const double weightratio = _dataWgts[j] / _weights[j];
263 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
264 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
265 const double x = (double(_nPoints - binlo) * _lo +
266 double(binlo) * _hi) / double(_nPoints);
267 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
268 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
269 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
270 }
271 }
272 }
273 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
274 for (Int_t i=0;i<_nPoints+1;++i)
275 _lookupTable[i] /= sqrt2pi * _sumWgt;
276}
277
278////////////////////////////////////////////////////////////////////////////////
279
280double RooKeysPdf::evaluate() const {
281 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
282 if (i<0) {
283// cerr << "got point below lower bound:"
284// << double(_x) << " < " << _lo
285// << " -- performing linear extrapolation..." << endl;
286 i=0;
287 }
288 if (i>_nPoints-1) {
289// cerr << "got point above upper bound:"
290// << double(_x) << " > " << _hi
291// << " -- performing linear extrapolation..." << endl;
292 i=_nPoints-1;
293 }
294 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
295
296 // for now do simple linear interpolation.
297 // one day replace by splines...
298 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
299 if (ret<0) ret=0 ;
300 return ret ;
301}
302
304 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
305{
306 if (matchArgs(allVars, analVars, _x)) return 1;
307 return 0;
308}
309
310double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
311{
312 R__ASSERT(1 == code);
313 // this code is based on _lookupTable and uses linear interpolation, just as
314 // evaluate(); integration is done using the trapez rule
315 const double xmin = std::max(_lo, _x.min(rangeName));
316 const double xmax = std::min(_hi, _x.max(rangeName));
317 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
318 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
319 _nPoints - 1);
320 double sum = 0.;
321 // sum up complete bins in middle
322 if (imin + 1 < imax)
323 sum += _lookupTable[imin + 1] + _lookupTable[imax];
324 for (Int_t i = imin + 2; i < imax; ++i)
325 sum += 2. * _lookupTable[i];
326 sum *= _binWidth * 0.5;
327 // treat incomplete bins
328 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
329 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
330 if (imin < imax) {
331 // first bin
332 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
333 _lookupTable[imin] + dxmin *
334 (_lookupTable[imin + 1] - _lookupTable[imin]));
335 // last bin
336 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
337 _lookupTable[imax] + dxmax *
338 (_lookupTable[imax + 1] - _lookupTable[imax]));
339 } else if (imin == imax) {
340 // first bin == last bin
341 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
342 _lookupTable[imin] + dxmin *
343 (_lookupTable[imin + 1] - _lookupTable[imin]) +
344 _lookupTable[imax] + dxmax *
345 (_lookupTable[imax + 1] - _lookupTable[imax]));
346 }
347 return sum;
348}
349
351{
352 if (vars.contains(*_x.absArg())) return 1;
353 return 0;
354}
355
356double RooKeysPdf::maxVal(Int_t code) const
357{
358 R__ASSERT(1 == code);
359 double max = -std::numeric_limits<double>::max();
360 for (Int_t i = 0; i <= _nPoints; ++i)
361 if (max < _lookupTable[i]) max = _lookupTable[i];
362 return max;
363}
364
365////////////////////////////////////////////////////////////////////////////////
366
367double RooKeysPdf::g(double x,double sigmav) const {
368 double y=0;
369 // since data is sorted, we can be a little faster because we know which data
370 // points contribute
371 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
372 x - _nSigma * sigmav);
373 if (it >= (_dataPts + _nEvents)) return 0.;
374 double* iend = std::upper_bound(it, _dataPts + _nEvents,
375 x + _nSigma * sigmav);
376 for ( ; it < iend; ++it) {
377 const double r = (x - *it) / sigmav;
378 y += std::exp(-0.5 * r * r);
379 }
380
381 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
382 return y/(sigmav*sqrt2pi);
383}
#define b(i)
Definition RSha256.hxx:100
#define g(i)
Definition RSha256.hxx:105
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
Int_t _nEvents
Total number of events in test statistic calculation.
#define TRACE_DESTROY
Definition RooTrace.h:24
#define TRACE_CREATE
Definition RooTrace.h:23
int Int_t
Definition RtypesCore.h:45
#define ClassImp(name)
Definition Rtypes.h:377
#define R__ASSERT(e)
Definition TError.h:118
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TRObject operator()(const T1 &t1) const
#define snprintf
Definition civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition RooArgProxy.h:46
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Container class to hold unbinned data.
Definition RooDataSet.h:57
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition RooKeysPdf.h:24
double _binWidth
Definition RooKeysPdf.h:77
double _sumWgt
Definition RooKeysPdf.h:63
static constexpr int _nPoints
Definition RooKeysPdf.h:65
double * _dataWgts
Definition RooKeysPdf.h:61
RooKeysPdf()
coverity[UNINIT_CTOR]
double _lookupTable[_nPoints+1]
Definition RooKeysPdf.h:66
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
bool _mirrorRight
Definition RooKeysPdf.h:71
double _rho
Definition RooKeysPdf.h:78
double _hi
Definition RooKeysPdf.h:77
Char_t _varName[128]
Definition RooKeysPdf.h:76
double g(double x, double sigma) const
Int_t _nEvents
Definition RooKeysPdf.h:59
void LoadDataSet(RooDataSet &data)
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=nullptr) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
RooRealProxy _x
Definition RooKeysPdf.h:51
double * _weights
Definition RooKeysPdf.h:62
double analyticalIntegral(Int_t code, const char *rangeName=nullptr) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
bool _mirrorLeft
Definition RooKeysPdf.h:70
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
~RooKeysPdf() override
double _lo
Definition RooKeysPdf.h:77
double * _dataPts
Definition RooKeysPdf.h:60
bool _asymRight
Definition RooKeysPdf.h:73
static const double _nSigma
Definition RooKeysPdf.h:57
bool _asymLeft
Definition RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Variable that can be changed from the outside.
Definition RooRealVar.h:37
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
constexpr Double_t Pi()
Definition TMath.h:37
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345