ROOT  6.06/09
Reference Guide
RooKeysPdf.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitModels *
4  * @(#)root/roofit:$Id$
5  * Authors: *
6  * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9  * *
10  * Copyright (c) 2000-2005, Regents of the University of California *
11  * and Stanford University. All rights reserved. *
12  * *
13  * Redistribution and use in source and binary forms, *
14  * with or without modification, are permitted according to the terms *
15  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16  *****************************************************************************/
17 #include "RooFit.h"
18 
19 #include <limits>
20 #include <algorithm>
21 #include <cmath>
22 #include "Riostream.h"
23 #include "TMath.h"
24 
25 #include "RooKeysPdf.h"
26 #include "RooAbsReal.h"
27 #include "RooRealVar.h"
28 #include "RooRandom.h"
29 #include "RooDataSet.h"
30 #include "RooTrace.h"
31 
32 #include "TError.h"
33 
34 using namespace std;
35 
37 
38 
39 //////////////////////////////////////////////////////////////////////////////
40 //
41 // BEGIN_HTML
42 // Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
43 // of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
44 // each contributing 1/N to the total integral of the p.d.f.
45 // <p>
46 // If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
47 // local density of events, i.e. narrow for regions with high event density to preserve details and
48 // wide for regions with log event density to promote smoothness. The details of the general algorithm
49 // are described in the following paper:
50 // <p>
51 // Cranmer KS, Kernel Estimation in High-Energy Physics.
52 // Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
53 // <p>
54 // END_HTML
55 //
56 
57 const Double_t RooKeysPdf::_nSigma = std::sqrt(-2. *
58  std::log(std::numeric_limits<Double_t>::epsilon()));
59 
60 ////////////////////////////////////////////////////////////////////////////////
61 /// coverity[UNINIT_CTOR]
62 
63  RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
64  _mirrorLeft(kFALSE), _mirrorRight(kFALSE),
65  _asymLeft(kFALSE), _asymRight(kFALSE)
66 {
68 }
69 
70 
71 ////////////////////////////////////////////////////////////////////////////////
72 /// cache stuff about x
73 
74 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
75  RooAbsReal& x, RooDataSet& data,
76  Mirror mirror, Double_t rho) :
77  RooAbsPdf(name,title),
78  _x("x","observable",this,x),
79  _nEvents(0),
80  _dataPts(0),
81  _dataWgts(0),
82  _weights(0),
83  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
84  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
85  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
86  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
87  _rho(rho)
88 {
89  snprintf(_varName, 128,"%s", x.GetName());
90  RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
91  _lo = real.getMin();
92  _hi = real.getMax();
93  _binWidth = (_hi-_lo)/(_nPoints-1);
94 
95  // form the lookup table
96  LoadDataSet(data);
98 }
99 
100 
101 
102 ////////////////////////////////////////////////////////////////////////////////
103 /// cache stuff about x
104 
105 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
106  RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
107  Mirror mirror, Double_t rho) :
108  RooAbsPdf(name,title),
109  _x("x","Observable",this,xpdf),
110  _nEvents(0),
111  _dataPts(0),
112  _dataWgts(0),
113  _weights(0),
114  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
115  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
116  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
117  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
118  _rho(rho)
119 {
120  snprintf(_varName, 128,"%s", xdata.GetName());
121  RooAbsRealLValue& real= (RooRealVar&)(xdata);
122  _lo = real.getMin();
123  _hi = real.getMax();
124  _binWidth = (_hi-_lo)/(_nPoints-1);
125 
126  // form the lookup table
127  LoadDataSet(data);
129 }
130 
131 
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 
135 RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
136  RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
137  _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
138  _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
139  _asymLeft(other._asymLeft), _asymRight(other._asymRight),
140  _rho( other._rho ) {
141  // cache stuff about x
142  snprintf(_varName, 128, "%s", other._varName );
143  _lo = other._lo;
144  _hi = other._hi;
145  _binWidth = other._binWidth;
146 
147  // copy over data and weights... not necessary, commented out for speed
148 // _dataPts = new Double_t[_nEvents];
149 // _weights = new Double_t[_nEvents];
150 // for (Int_t i= 0; i<_nEvents; i++) {
151 // _dataPts[i]= other._dataPts[i];
152 // _weights[i]= other._weights[i];
153 // }
154 
155  // copy over the lookup table
156  for (Int_t i= 0; i<_nPoints+1; i++)
157  _lookupTable[i]= other._lookupTable[i];
158 
160 }
161 
162 
163 ////////////////////////////////////////////////////////////////////////////////
164 
166  delete[] _dataPts;
167  delete[] _dataWgts;
168  delete[] _weights;
169 
171 }
172 
173 
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// small helper structure
177 
178 namespace {
179  struct Data {
180  Double_t x;
181  Double_t w;
182  };
183  // helper to order two Data structures
184  struct cmp {
185  inline bool operator()(const struct Data& a, const struct Data& b) const
186  { return a.x < b.x; }
187  };
188 }
190  delete[] _dataPts;
191  delete[] _dataWgts;
192  delete[] _weights;
193 
194  std::vector<Data> tmp;
195  tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
196  Double_t x0 = 0., x1 = 0., x2 = 0.;
197  _sumWgt = 0.;
198  // read the data set into tmp and accumulate some statistics
199  RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
200  for (Int_t i = 0; i < data.numEntries(); ++i) {
201  data.get(i);
202  const Double_t x = real.getVal();
203  const Double_t w = data.weight();
204  x0 += w;
205  x1 += w * x;
206  x2 += w * x * x;
208 
209  Data p;
210  p.x = x, p.w = w;
211  tmp.push_back(p);
212  if (_mirrorLeft) {
213  p.x = 2. * _lo - x;
214  tmp.push_back(p);
215  }
216  if (_mirrorRight) {
217  p.x = 2. * _hi - x;
218  tmp.push_back(p);
219  }
220  }
221  // sort the entire data set so that values of x are increasing
222  std::sort(tmp.begin(), tmp.end(), cmp());
223 
224  // copy the sorted data set to its final destination
225  _nEvents = tmp.size();
226  _dataPts = new Double_t[_nEvents];
227  _dataWgts = new Double_t[_nEvents];
228  for (unsigned i = 0; i < tmp.size(); ++i) {
229  _dataPts[i] = tmp[i].x;
230  _dataWgts[i] = tmp[i].w;
231  }
232  {
233  // free tmp
234  std::vector<Data> tmp2;
235  tmp2.swap(tmp);
236  }
237 
238  Double_t meanv=x1/x0;
239  Double_t sigmav=std::sqrt(x2/x0-meanv*meanv);
240  Double_t h=std::pow(Double_t(4)/Double_t(3),0.2)*std::pow(_nEvents,-0.2)*_rho;
241  Double_t hmin=h*sigmav*std::sqrt(2.)/10;
242  Double_t norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0));
243 
245  for(Int_t j=0;j<_nEvents;++j) {
246  _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav));
247  if (_weights[j]<hmin) _weights[j]=hmin;
248  }
249 
250  // The idea below is that beyond nSigma sigma, the value of the exponential
251  // in the Gaussian is well below the machine precision of a double, so it
252  // does not contribute any more. That way, we can limit how many bins of the
253  // binned approximation in _lookupTable we have to touch when filling it.
254  for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
255  for(Int_t j=0;j<_nEvents;++j) {
256  const Double_t xlo = std::min(_hi,
257  std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
258  const Double_t xhi = std::max(_lo,
259  std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
260  if (xlo >= xhi) continue;
261  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
262  const Double_t weightratio = _dataWgts[j] / _weights[j];
263  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
264  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
265  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
266  Double_t(binlo) * _hi) / Double_t(_nPoints);
267  Double_t chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
268  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
269  _lookupTable[k] += weightratio * std::exp(- chi * chi);
270  }
271  }
272  if (_asymLeft) {
273  for(Int_t j=0;j<_nEvents;++j) {
274  const Double_t xlo = std::min(_hi,
275  std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
276  const Double_t xhi = std::max(_lo,
277  std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
278  if (xlo >= xhi) continue;
279  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
280  const Double_t weightratio = _dataWgts[j] / _weights[j];
281  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
282  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
283  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
284  Double_t(binlo) * _hi) / Double_t(_nPoints);
285  Double_t chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
286  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
287  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
288  }
289  }
290  }
291  if (_asymRight) {
292  for(Int_t j=0;j<_nEvents;++j) {
293  const Double_t xlo = std::min(_hi,
294  std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
295  const Double_t xhi = std::max(_lo,
296  std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
297  if (xlo >= xhi) continue;
298  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
299  const Double_t weightratio = _dataWgts[j] / _weights[j];
300  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
301  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
302  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
303  Double_t(binlo) * _hi) / Double_t(_nPoints);
304  Double_t chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
305  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
306  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
307  }
308  }
309  }
310  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
311  for (Int_t i=0;i<_nPoints+1;++i)
312  _lookupTable[i] /= sqrt2pi * _sumWgt;
313 }
314 
315 ////////////////////////////////////////////////////////////////////////////////
316 
319  if (i<0) {
320 // cerr << "got point below lower bound:"
321 // << Double_t(_x) << " < " << _lo
322 // << " -- performing linear extrapolation..." << endl;
323  i=0;
324  }
325  if (i>_nPoints-1) {
326 // cerr << "got point above upper bound:"
327 // << Double_t(_x) << " > " << _hi
328 // << " -- performing linear extrapolation..." << endl;
329  i=_nPoints-1;
330  }
332 
333  // for now do simple linear interpolation.
334  // one day replace by splines...
335  Double_t ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
336  if (ret<0) ret=0 ;
337  return ret ;
338 }
339 
341  RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
342 {
343  if (matchArgs(allVars, analVars, _x)) return 1;
344  return 0;
345 }
346 
347 Double_t RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
348 {
349  R__ASSERT(1 == code);
350  // this code is based on _lookupTable and uses linear interpolation, just as
351  // evaluate(); integration is done using the trapez rule
352  const Double_t xmin = std::max(_lo, _x.min(rangeName));
353  const Double_t xmax = std::min(_hi, _x.max(rangeName));
354  const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
355  const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
356  _nPoints - 1);
357  Double_t sum = 0.;
358  // sum up complete bins in middle
359  if (imin + 1 < imax)
360  sum += _lookupTable[imin + 1] + _lookupTable[imax];
361  for (Int_t i = imin + 2; i < imax; ++i)
362  sum += 2. * _lookupTable[i];
363  sum *= _binWidth * 0.5;
364  // treat incomplete bins
365  const Double_t dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
366  const Double_t dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
367  if (imin < imax) {
368  // first bin
369  sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
370  _lookupTable[imin] + dxmin *
371  (_lookupTable[imin + 1] - _lookupTable[imin]));
372  // last bin
373  sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
374  _lookupTable[imax] + dxmax *
375  (_lookupTable[imax + 1] - _lookupTable[imax]));
376  } else if (imin == imax) {
377  // first bin == last bin
378  sum += _binWidth * (dxmax - dxmin) * 0.5 * (
379  _lookupTable[imin] + dxmin *
380  (_lookupTable[imin + 1] - _lookupTable[imin]) +
381  _lookupTable[imax] + dxmax *
382  (_lookupTable[imax + 1] - _lookupTable[imax]));
383  }
384  return sum;
385 }
386 
388 {
389  if (vars.contains(*_x.absArg())) return 1;
390  return 0;
391 }
392 
394 {
395  R__ASSERT(1 == code);
397  for (Int_t i = 0; i <= _nPoints; ++i)
398  if (max < _lookupTable[i]) max = _lookupTable[i];
399  return max;
400 }
401 
402 
403 ////////////////////////////////////////////////////////////////////////////////
404 
406  Double_t y=0;
407  // since data is sorted, we can be a little faster because we know which data
408  // points contribute
409  Double_t* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
410  x - _nSigma * sigmav);
411  if (it >= (_dataPts + _nEvents)) return 0.;
412  Double_t* iend = std::upper_bound(it, _dataPts + _nEvents,
413  x + _nSigma * sigmav);
414  for ( ; it < iend; ++it) {
415  const Double_t r = (x - *it) / sigmav;
416  y += std::exp(-0.5 * r * r);
417  }
418 
419  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
420  return y/(sigmav*sqrt2pi*_nEvents);
421 }
virtual Double_t analyticalIntegral(Int_t code, const char *rangeName=0) const
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral. ...
Definition: RooKeysPdf.cxx:347
Double_t _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
Double_t * _dataWgts
Definition: RooKeysPdf.h:62
float xmin
Definition: THbookFile.cxx:93
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
Double_t _lo
Definition: RooKeysPdf.h:76
Double_t * _weights
Definition: RooKeysPdf.h:63
TH1 * h
Definition: legend2.C:5
virtual Double_t maxVal(Int_t code) const
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:393
Bool_t _mirrorLeft
Definition: RooKeysPdf.h:71
Bool_t contains(const RooAbsArg &var) const
Double_t g(Double_t x, Double_t sigma) const
Definition: RooKeysPdf.cxx:405
Double_t _sumWgt
Definition: RooKeysPdf.h:64
#define R__ASSERT(e)
Definition: TError.h:98
virtual Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported...
Definition: RooKeysPdf.cxx:340
int Int_t
Definition: RtypesCore.h:41
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
STL namespace.
const RooAbsReal & arg() const
Definition: RooRealProxy.h:43
double sqrt(double)
static const double x2[5]
#define TRACE_CREATE
Definition: RooTrace.h:23
Double_t x[n]
Definition: legend1.C:17
double pow(double, double)
Double_t evaluate() const
Definition: RooKeysPdf.cxx:317
Bool_t _asymRight
Definition: RooKeysPdf.h:72
std::vector< std::vector< double > > Data
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:189
Double_t _binWidth
Definition: RooKeysPdf.h:76
static const Double_t _nSigma
Definition: RooKeysPdf.h:58
virtual Int_t getMaxVal(const RooArgSet &vars) const
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:387
virtual ~RooKeysPdf()
Definition: RooKeysPdf.cxx:165
Char_t _varName[128]
Definition: RooKeysPdf.h:75
ROOT::R::TRInterface & r
Definition: Object.C:4
virtual Double_t weight() const
Return event weight of current event.
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:291
double floor(double)
Double_t _hi
Definition: RooKeysPdf.h:76
RooAbsArg * absArg() const
Definition: RooArgProxy.h:37
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
float xmax
Definition: THbookFile.cxx:93
Int_t _nEvents
Definition: RooKeysPdf.h:60
Bool_t _mirrorRight
Definition: RooKeysPdf.h:71
REAL epsilon
Definition: triangle.c:617
Double_t _rho
Definition: RooKeysPdf.h:77
Double_t Pi()
Definition: TMath.h:44
TRObject operator()(const T1 &t1) const
static const double x1[5]
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:53
Double_t y[n]
Definition: legend1.C:17
#define TRACE_DESTROY
Definition: RooTrace.h:24
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
#define name(a, b)
Definition: linkTestLib0.cpp:5
Bool_t _asymLeft
Definition: RooKeysPdf.h:72
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Double_t min(const char *rname=0) const
Definition: RooRealProxy.h:56
RooRealProxy _x
Definition: RooKeysPdf.h:52
Bool_t matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
double exp(double)
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual const RooArgSet * get(Int_t index) const
Return RooArgSet with coordinates of event 'index'.
Double_t max(const char *rname=0) const
Definition: RooRealProxy.h:57
double log(double)
Double_t * _dataPts
Definition: RooKeysPdf.h:61