Logo ROOT  
Reference Guide
RooMath.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 // -- CLASS DESCRIPTION [MISC] --
18 // RooMath is a singleton class implementing various mathematical
19 // functions not found in TMath, mostly involving complex algebra
20 //
21 //
22 
23 #include <complex>
24 #include <cmath>
25 #include <algorithm>
26 
27 #include "RooFit.h"
28 
29 #include "RooMath.h"
30 #include "Riostream.h"
31 #include "RooMsgService.h"
32 
33 using namespace std;
34 
35 namespace faddeeva_impl {
36  static inline void cexp(double& re, double& im)
37  {
38  // with gcc on unix machines and on x86_64, we can gain by hand-coding
39  // exp(z) for the x87 coprocessor; other platforms have the default
40  // routines as fallback implementation, and compilers other than gcc on
41  // x86_64 generate better code with the default routines; also avoid
42  // the inline assembly code when the copiler is not optimising code, or
43  // is optimising for code size
44  // (we insist on __unix__ here, since the assemblers on other OSs
45  // typically do not speak AT&T syntax as gas does...)
46 #if !(defined(__GNUC__) || defined(__clang__)) || \
47  !defined(__unix__) || !defined(__x86_64__) || \
48  !defined(__OPTIMIZE__) || defined(__OPTIMIZE_SIZE__) || \
49  defined(__INTEL_COMPILER) || \
50  defined(__OPEN64__) || defined(__PATHSCALE__)
51  const double e = std::exp(re);
52  re = e * std::cos(im);
53  im = e * std::sin(im);
54 #else
55  __asm__ (
56  "fxam\n\t" // examine st(0): NaN? Inf?
57  "fstsw %%ax\n\t"
58  "movb $0x45,%%dh\n\t"
59  "andb %%ah,%%dh\n\t"
60  "cmpb $0x05,%%dh\n\t"
61  "jz 1f\n\t" // have NaN or infinity, handle below
62  "fldl2e\n\t" // load log2(e)
63  "fmulp\n\t" // re * log2(e)
64  "fld %%st(0)\n\t" // duplicate re * log2(e)
65  "frndint\n\t" // int(re * log2(e))
66  "fsubr %%st,%%st(1)\n\t" // st(1) = x = frac(re * log2(e))
67  "fxch\n\t" // swap st(0), st(1)
68  "f2xm1\n\t" // 2^x - 1
69  "fld1\n\t" // st(0) = 1
70  "faddp\n\t" // st(0) = 2^x
71  "fscale\n\t" // 2 ^ (int(re * log2(e)) + x)
72  "fstp %%st(1)\n\t" // pop st(1)
73  "jmp 2f\n\t"
74  "1:\n\t" // handle NaN, Inf...
75  "testl $0x200, %%eax\n\t"// -infinity?
76  "jz 2f\n\t"
77  "fstp %%st\n\t" // -Inf, so pop st(0)
78  "fldz\n\t" // st(0) = 0
79  "2:\n\t" // here. we have st(0) == exp(re)
80  "fxch\n\t" // st(0) = im, st(1) = exp(re)
81  "fsincos\n\t" // st(0) = cos(im), st(1) = sin(im)
82  "fnstsw %%ax\n\t"
83  "testl $0x400,%%eax\n\t"
84  "jz 4f\n\t" // |im| too large for fsincos?
85  "fldpi\n\t" // st(0) = pi
86  "fadd %%st(0)\n\t" // st(0) *= 2;
87  "fxch %%st(1)\n\t" // st(0) = im, st(1) = 2 * pi
88  "3:\n\t"
89  "fprem1\n\t" // st(0) = fmod(im, 2 * pi)
90  "fnstsw %%ax\n\t"
91  "testl $0x400,%%eax\n\t"
92  "jnz 3b\n\t" // fmod done?
93  "fstp %%st(1)\n\t" // yes, pop st(1) == 2 * pi
94  "fsincos\n\t" // st(0) = cos(im), st(1) = sin(im)
95  "4:\n\t" // all fine, fsincos succeeded
96  "fmul %%st(2)\n\t" // st(0) *= st(2)
97  "fxch %%st(2)\n\t" // st(2)=exp(re)*cos(im),st(0)=exp(im)
98  "fmulp %%st(1)\n\t" // st(1)=exp(re)*sin(im), pop st(0)
99  : "=t" (im), "=u" (re): "0" (re), "1" (im) :
100  "eax", "dh", "cc"
101 #ifndef __clang__
102  // normal compilers (like gcc) want to be told that we
103  // clobber x87 registers, even if we pop them afterwards
104  // (so they can make sure they don't save anything there)
105  , "st(5)", "st(6)", "st(7)"
106 #else // __clang__
107  // clang produces an error message with the clobber list
108  // from above - not sure why; it seems harmless to leave
109  // the popped x87 registers out of the clobber list for
110  // clang, and that is in fact what seems to be recommended
111  // here:
112  // http://lists.cs.uiuc.edu/pipermail/cfe-dev/2012-May/021715.html
113 #endif // __clang__
114  );
115 #endif
116  }
117 
118  template <class T, unsigned N, unsigned NTAYLOR, unsigned NCF>
119  static inline std::complex<T> faddeeva_smabmq_impl(
120  T zre, T zim, const T tm,
121  const T (&a)[N], const T (&npi)[N],
122  const T (&taylorarr)[N * NTAYLOR * 2])
123  {
124  // catch singularities in the Fourier representation At
125  // z = n pi / tm, and provide a Taylor series expansion in those
126  // points, and only use it when we're close enough to the real axis
127  // that there is a chance we need it
128  const T zim2 = zim * zim;
129  const T maxnorm = T(9) / T(1000000);
130  if (zim2 < maxnorm) {
131  // we're close enough to the real axis that we need to worry about
132  // singularities
133  const T dnsing = tm * zre / npi[1];
134  const T dnsingmax2 = (T(N) - T(1) / T(2)) * (T(N) - T(1) / T(2));
135  if (dnsing * dnsing < dnsingmax2) {
136  // we're in the interesting range of the real axis as well...
137  // deal with Re(z) < 0 so we only need N different Taylor
138  // expansions; use w(-x+iy) = conj(w(x+iy))
139  const bool negrez = zre < T(0);
140  // figure out closest singularity
141  const int nsing = int(std::abs(dnsing) + T(1) / T(2));
142  // and calculate just how far we are from it
143  const T zmnpire = std::abs(zre) - npi[nsing];
144  const T zmnpinorm = zmnpire * zmnpire + zim2;
145  // close enough to one of the singularities?
146  if (zmnpinorm < maxnorm) {
147  const T* coeffs = &taylorarr[nsing * NTAYLOR * 2];
148  // calculate value of taylor expansion...
149  // (note: there's no chance to vectorize this one, since
150  // the value of the next iteration depend on the ones from
151  // the previous iteration)
152  T sumre = coeffs[0], sumim = coeffs[1];
153  for (unsigned i = 1; i < NTAYLOR; ++i) {
154  const T re = sumre * zmnpire - sumim * zim;
155  const T im = sumim * zmnpire + sumre * zim;
156  sumre = re + coeffs[2 * i + 0];
157  sumim = im + coeffs[2 * i + 1];
158  }
159  // undo the flip in real part of z if needed
160  if (negrez) return std::complex<T>(sumre, -sumim);
161  else return std::complex<T>(sumre, sumim);
162  }
163  }
164  }
165  // negative Im(z) is treated by calculating for -z, and using the
166  // symmetry properties of erfc(z)
167  const bool negimz = zim < T(0);
168  if (negimz) {
169  zre = -zre;
170  zim = -zim;
171  }
172  const T znorm = zre * zre + zim2;
173  if (znorm > tm * tm) {
174  // use continued fraction approximation for |z| large
175  const T isqrtpi = 5.64189583547756287e-01;
176  const T z2re = (zre + zim) * (zre - zim);
177  const T z2im = T(2) * zre * zim;
178  T cfre = T(1), cfim = T(0), cfnorm = T(1);
179  for (unsigned k = NCF; k; --k) {
180  cfre = +(T(k) / T(2)) * cfre / cfnorm;
181  cfim = -(T(k) / T(2)) * cfim / cfnorm;
182  if (k & 1) cfre -= z2re, cfim -= z2im;
183  else cfre += T(1);
184  cfnorm = cfre * cfre + cfim * cfim;
185  }
186  T sumre = (zim * cfre - zre * cfim) * isqrtpi / cfnorm;
187  T sumim = -(zre * cfre + zim * cfim) * isqrtpi / cfnorm;
188  if (negimz) {
189  // use erfc(-z) = 2 - erfc(z) to get good accuracy for
190  // Im(z) < 0: 2 / exp(z^2) - w(z)
191  T ez2re = -z2re, ez2im = -z2im;
192  faddeeva_impl::cexp(ez2re, ez2im);
193  return std::complex<T>(T(2) * ez2re - sumre,
194  T(2) * ez2im - sumim);
195  } else {
196  return std::complex<T>(sumre, sumim);
197  }
198  }
199  const T twosqrtpi = 3.54490770181103205e+00;
200  const T tmzre = tm * zre, tmzim = tm * zim;
201  // calculate exp(i tm z)
202  T eitmzre = -tmzim, eitmzim = tmzre;
203  faddeeva_impl::cexp(eitmzre, eitmzim);
204  // form 1 +/- exp (i tm z)
205  const T numerarr[4] = {
206  T(1) - eitmzre, -eitmzim, T(1) + eitmzre, +eitmzim
207  };
208  // form tm z * (1 +/- exp(i tm z))
209  const T numertmz[4] = {
210  tmzre * numerarr[0] - tmzim * numerarr[1],
211  tmzre * numerarr[1] + tmzim * numerarr[0],
212  tmzre * numerarr[2] - tmzim * numerarr[3],
213  tmzre * numerarr[3] + tmzim * numerarr[2]
214  };
215  // common subexpressions for use inside the loop
216  const T reimtmzm2 = T(-2) * tmzre * tmzim;
217  const T imtmz2 = tmzim * tmzim;
218  const T reimtmzm22 = reimtmzm2 * reimtmzm2;
219  // on non-x86_64 architectures, when the compiler is producing
220  // unoptimised code and when optimising for code size, we use the
221  // straightforward implementation, but for x86_64, we use the
222  // brainf*cked code below that the gcc vectorizer likes to gain a few
223  // clock cycles; non-gcc compilers also get the normal code, since they
224  // usually do a better job with the default code (and yes, it's a pain
225  // that they're all pretending to be gcc)
226 #if (!defined(__x86_64__)) || !defined(__OPTIMIZE__) || \
227  defined(__OPTIMIZE_SIZE__) || defined(__INTEL_COMPILER) || \
228  defined(__clang__) || defined(__OPEN64__) || \
229  defined(__PATHSCALE__) || !defined(__GNUC__)
230  T sumre = (-a[0] / znorm) * (numerarr[0] * zre + numerarr[1] * zim);
231  T sumim = (-a[0] / znorm) * (numerarr[1] * zre - numerarr[0] * zim);
232  for (unsigned i = 0; i < N; ++i) {
233  const unsigned j = (i << 1) & 2;
234  // denominator
235  const T wk = imtmz2 + (npi[i] + tmzre) * (npi[i] - tmzre);
236  // norm of denominator
237  const T norm = wk * wk + reimtmzm22;
238  const T f = T(2) * tm * a[i] / norm;
239  // sum += a[i] * numer / wk
240  sumre -= f * (numertmz[j] * wk + numertmz[j + 1] * reimtmzm2);
241  sumim -= f * (numertmz[j + 1] * wk - numertmz[j] * reimtmzm2);
242  }
243 #else
244  // BEGIN fully vectorisable code - enjoy reading... ;)
245  T tmp[2 * N];
246  for (unsigned i = 0; i < N; ++i) {
247  const T wk = imtmz2 + (npi[i] + tmzre) * (npi[i] - tmzre);
248  tmp[2 * i + 0] = wk;
249  tmp[2 * i + 1] = T(2) * tm * a[i] / (wk * wk + reimtmzm22);
250  }
251  for (unsigned i = 0; i < N / 2; ++i) {
252  T wk = tmp[4 * i + 0], f = tmp[4 * i + 1];
253  tmp[4 * i + 0] = -f * (numertmz[0] * wk + numertmz[1] * reimtmzm2);
254  tmp[4 * i + 1] = -f * (numertmz[1] * wk - numertmz[0] * reimtmzm2);
255  wk = tmp[4 * i + 2], f = tmp[4 * i + 3];
256  tmp[4 * i + 2] = -f * (numertmz[2] * wk + numertmz[3] * reimtmzm2);
257  tmp[4 * i + 3] = -f * (numertmz[3] * wk - numertmz[2] * reimtmzm2);
258  }
259  if (N & 1) {
260  // we may have missed one element in the last loop; if so, process
261  // it now...
262  const T wk = tmp[2 * N - 2], f = tmp[2 * N - 1];
263  tmp[2 * (N - 1) + 0] = -f * (numertmz[0] * wk + numertmz[1] * reimtmzm2);
264  tmp[2 * (N - 1) + 1] = -f * (numertmz[1] * wk - numertmz[0] * reimtmzm2);
265  }
266  T sumre = (-a[0] / znorm) * (numerarr[0] * zre + numerarr[1] * zim);
267  T sumim = (-a[0] / znorm) * (numerarr[1] * zre - numerarr[0] * zim);
268  for (unsigned i = 0; i < N; ++i) {
269  sumre += tmp[2 * i + 0];
270  sumim += tmp[2 * i + 1];
271  }
272  // END fully vectorisable code
273 #endif
274  // prepare the result
275  if (negimz) {
276  // use erfc(-z) = 2 - erfc(z) to get good accuracy for
277  // Im(z) < 0: 2 / exp(z^2) - w(z)
278  const T z2im = -T(2) * zre * zim;
279  const T z2re = -(zre + zim) * (zre - zim);
280  T ez2re = z2re, ez2im = z2im;
281  faddeeva_impl::cexp(ez2re, ez2im);
282  return std::complex<T>(T(2) * ez2re + sumim / twosqrtpi,
283  T(2) * ez2im - sumre / twosqrtpi);
284  } else {
285  return std::complex<T>(-sumim / twosqrtpi, sumre / twosqrtpi);
286  }
287  }
288 
289  static const double npi24[24] = { // precomputed values n * pi
290  0.00000000000000000e+00, 3.14159265358979324e+00, 6.28318530717958648e+00,
291  9.42477796076937972e+00, 1.25663706143591730e+01, 1.57079632679489662e+01,
292  1.88495559215387594e+01, 2.19911485751285527e+01, 2.51327412287183459e+01,
293  2.82743338823081391e+01, 3.14159265358979324e+01, 3.45575191894877256e+01,
294  3.76991118430775189e+01, 4.08407044966673121e+01, 4.39822971502571053e+01,
295  4.71238898038468986e+01, 5.02654824574366918e+01, 5.34070751110264851e+01,
296  5.65486677646162783e+01, 5.96902604182060715e+01, 6.28318530717958648e+01,
297  6.59734457253856580e+01, 6.91150383789754512e+01, 7.22566310325652445e+01,
298  };
299  static const double a24[24] = { // precomputed Fourier coefficient prefactors
300  2.95408975150919338e-01, 2.75840233292177084e-01, 2.24573955224615866e-01,
301  1.59414938273911723e-01, 9.86657664154541891e-02, 5.32441407876394120e-02,
302  2.50521500053936484e-02, 1.02774656705395362e-02, 3.67616433284484706e-03,
303  1.14649364124223317e-03, 3.11757015046197600e-04, 7.39143342960301488e-05,
304  1.52794934280083635e-05, 2.75395660822107093e-06, 4.32785878190124505e-07,
305  5.93003040874588103e-08, 7.08449030774820423e-09, 7.37952063581678038e-10,
306  6.70217160600200763e-11, 5.30726516347079017e-12, 3.66432411346763916e-13,
307  2.20589494494103134e-14, 1.15782686262855879e-15, 5.29871142946730482e-17,
308  };
309  static const double taylorarr24[24 * 12] = {
310  // real part imaginary part, low order coefficients last
311  // nsing = 0
312  0.00000000000000000e-00, 3.00901111225470020e-01,
313  5.00000000000000000e-01, 0.00000000000000000e-00,
314  0.00000000000000000e-00, -7.52252778063675049e-01,
315  -1.00000000000000000e-00, 0.00000000000000000e-00,
316  0.00000000000000000e-00, 1.12837916709551257e+00,
317  1.00000000000000000e-00, 0.00000000000000000e-00,
318  // nsing = 1
319  -2.22423508493755319e-01, 1.87966717746229718e-01,
320  3.41805419240637628e-01, 3.42752593807919263e-01,
321  4.66574321730757753e-01, -5.59649213591058097e-01,
322  -8.05759710273191021e-01, -5.38989366115424093e-01,
323  -4.88914083733395200e-01, 9.80580906465856792e-01,
324  9.33757118080975970e-01, 2.82273885115127769e-01,
325  // nsing = 2
326  -2.60522586513312894e-01, -4.26259455096092786e-02,
327  1.36549702008863349e-03, 4.39243227763478846e-01,
328  6.50591493715480700e-01, -1.23422352472779046e-01,
329  -3.43379903564271318e-01, -8.13862662890748911e-01,
330  -7.96093943501906645e-01, 6.11271022503935772e-01,
331  7.60213717643090957e-01, 4.93801903948967945e-01,
332  // nsing = 3
333  -1.18249853727020186e-01, -1.90471659765411376e-01,
334  -2.59044664869706839e-01, 2.69333898502392004e-01,
335  4.99077838344125714e-01, 2.64644800189075006e-01,
336  1.26114512111568737e-01, -7.46519337025968199e-01,
337  -8.47666863706379907e-01, 1.89347715957263646e-01,
338  5.39641485816297176e-01, 5.97805988669631615e-01,
339  // nsing = 4
340  4.94825297066481491e-02, -1.71428212158876197e-01,
341  -2.97766677111471585e-01, 1.60773286596649656e-02,
342  1.88114210832460682e-01, 4.11734391195006462e-01,
343  3.98540613293909842e-01, -4.63321903522162715e-01,
344  -6.99522070542463639e-01, -1.32412024008354582e-01,
345  3.33997185986131785e-01, 6.01983450812696742e-01,
346  // nsing = 5
347  1.18367078448232332e-01, -6.09533063579086850e-02,
348  -1.74762998833038991e-01, -1.39098099222000187e-01,
349  -6.71534655984154549e-02, 3.34462251996496680e-01,
350  4.37429678577360024e-01, -1.59613865629038012e-01,
351  -4.71863911886034656e-01, -2.92759316465055762e-01,
352  1.80238737704018306e-01, 5.42834914744283253e-01,
353  // nsing = 6
354  8.87698096005701290e-02, 2.84339354980994902e-02,
355  -3.18943083830766399e-02, -1.53946887977045862e-01,
356  -1.71825061547624858e-01, 1.70734367410600348e-01,
357  3.33690792296469441e-01, 3.97048587678703930e-02,
358  -2.66422678503135697e-01, -3.18469797424381480e-01,
359  8.48049724711137773e-02, 4.60546329221462864e-01,
360  // nsing = 7
361  2.99767046276705077e-02, 5.34659695701718247e-02,
362  4.53131030251822568e-02, -9.37915401977138648e-02,
363  -1.57982359988083777e-01, 3.82170507060760740e-02,
364  1.98891589845251706e-01, 1.17546677047049354e-01,
365  -1.27514335237079297e-01, -2.72741112680307074e-01,
366  3.47906344595283767e-02, 3.82277517244493224e-01,
367  // nsing = 8
368  -7.35922494437203395e-03, 3.72011290318534610e-02,
369  5.66783220847204687e-02, -3.21015398169199501e-02,
370  -1.00308737825172555e-01, -2.57695148077963515e-02,
371  9.67294850588435368e-02, 1.18174625238337507e-01,
372  -5.21266530264988508e-02, -2.08850084114630861e-01,
373  1.24443217440050976e-02, 3.19239968065752286e-01,
374  // nsing = 9
375  -1.66126772808035320e-02, 1.46180329587665321e-02,
376  3.85927576915247303e-02, 1.18910471133003227e-03,
377  -4.94003498320899806e-02, -3.93468443660139110e-02,
378  3.92113167048952835e-02, 9.03306084789976219e-02,
379  -1.82889636251263500e-02, -1.53816215444915245e-01,
380  3.88103861995563741e-03, 2.72090310854550347e-01,
381  // nsing = 10
382  -1.21245068916826880e-02, 1.59080224420074489e-03,
383  1.91116222508366035e-02, 1.05879549199053302e-02,
384  -1.97228428219695318e-02, -3.16962067712639397e-02,
385  1.34110372628315158e-02, 6.18045654429108837e-02,
386  -5.52574921865441838e-03, -1.14259663804569455e-01,
387  1.05534036292203489e-03, 2.37326534898818288e-01,
388  // nsing = 11
389  -5.96835002183177493e-03, -2.42594931567031205e-03,
390  7.44753817476594184e-03, 9.33450807578394386e-03,
391  -6.52649522783026481e-03, -2.08165802069352019e-02,
392  3.89988065678848650e-03, 4.12784313451549132e-02,
393  -1.44110721106127920e-03, -8.76484782997757425e-02,
394  2.50210184908121337e-04, 2.11131066219336647e-01,
395  // nsing = 12
396  -2.24505212235034193e-03, -2.38114524227619446e-03,
397  2.36375918970809340e-03, 5.97324040603806266e-03,
398  -1.81333819936645381e-03, -1.28126250720444051e-02,
399  9.69251586187208358e-04, 2.83055679874589732e-02,
400  -3.24986363596307374e-04, -6.97056268370209313e-02,
401  5.17231862038123061e-05, 1.90681117197597520e-01,
402  // nsing = 13
403  -6.76887607549779069e-04, -1.48589685249767064e-03,
404  6.22548369472046953e-04, 3.43871156746448680e-03,
405  -4.26557147166379929e-04, -7.98854145009655400e-03,
406  2.06644460919535524e-04, 2.03107152586353217e-02,
407  -6.34563929410856987e-05, -5.71425144910115832e-02,
408  9.32252179140502456e-06, 1.74167663785025829e-01,
409  // nsing = 14
410  -1.67596437777156162e-04, -8.05384193869903178e-04,
411  1.37627277777023791e-04, 1.97652692602724093e-03,
412  -8.54392244879459717e-05, -5.23088906415977167e-03,
413  3.78965577556493513e-05, 1.52191559129376333e-02,
414  -1.07393019498185646e-05, -4.79347862153366295e-02,
415  1.46503970628861795e-06, 1.60471011683477685e-01,
416  // nsing = 15
417  -3.45715760630978778e-05, -4.31089554210205493e-04,
418  2.57350138106549737e-05, 1.19449262097417514e-03,
419  -1.46322227517372253e-05, -3.61303766799909378e-03,
420  5.99057675687392260e-06, 1.17993805017130890e-02,
421  -1.57660578509526722e-06, -4.09165023743669707e-02,
422  2.00739683204152177e-07, 1.48879348585662670e-01,
423  // nsing = 16
424  -5.99735188857573424e-06, -2.42949218855805052e-04,
425  4.09249090936269722e-06, 7.67400152727128171e-04,
426  -2.14920611287648034e-06, -2.60710519575546230e-03,
427  8.17591694958640978e-07, 9.38581640137393053e-03,
428  -2.00910914042737743e-07, -3.54045580123653803e-02,
429  2.39819738182594508e-08, 1.38916449405613711e-01,
430  // nsing = 17
431  -8.80708505155966658e-07, -1.46479474515521504e-04,
432  5.55693207391871904e-07, 5.19165587844615415e-04,
433  -2.71391142598826750e-07, -1.94439427580099576e-03,
434  9.64641799864928425e-08, 7.61536975207357980e-03,
435  -2.22357616069432967e-08, -3.09762939485679078e-02,
436  2.49806920458212581e-09, 1.30247401712293206e-01,
437  // nsing = 18
438  -1.10007111030476390e-07, -9.35886150886691786e-05,
439  6.46244096997824390e-08, 3.65267193418479043e-04,
440  -2.95175785569292542e-08, -1.48730955943961081e-03,
441  9.84949251974795537e-09, 6.27824679148707177e-03,
442  -2.13827217704781576e-09, -2.73545766571797965e-02,
443  2.26877724435352177e-10, 1.22627158810895267e-01,
444  // nsing = 19
445  -1.17302439957657553e-08, -6.24890956722053332e-05,
446  6.45231881609786173e-09, 2.64799907072561543e-04,
447  -2.76943921343331654e-09, -1.16094187847598385e-03,
448  8.71074689656480749e-10, 5.24514377390761210e-03,
449  -1.78730768958639407e-10, -2.43489203319091538e-02,
450  1.79658223341365988e-11, 1.15870972518909888e-01,
451  // nsing = 20
452  -1.07084502471985403e-09, -4.31515421260633319e-05,
453  5.54152563270547927e-10, 1.96606443937168357e-04,
454  -2.24423474431542338e-10, -9.21550077887211094e-04,
455  6.67734377376211580e-11, 4.43201203646827019e-03,
456  -1.29896907717633162e-11, -2.18236356404862774e-02,
457  1.24042409733678516e-12, 1.09836276968151848e-01,
458  // nsing = 21
459  -8.38816525569060600e-11, -3.06091807093959821e-05,
460  4.10033961556230842e-11, 1.48895624771753491e-04,
461  -1.57238128435253905e-11, -7.42073499862065649e-04,
462  4.43938379112418832e-12, 3.78197089773957382e-03,
463  -8.21067867869285873e-13, -1.96793607299577220e-02,
464  7.46725770201828754e-14, 1.04410965521273064e-01,
465  // nsing = 22
466  -5.64848922712870507e-12, -2.22021942382507691e-05,
467  2.61729537775838587e-12, 1.14683068921649992e-04,
468  -9.53316139085394895e-13, -6.05021573565916914e-04,
469  2.56116039498542220e-13, 3.25530796858307225e-03,
470  -4.51482829896525004e-14, -1.78416955716514289e-02,
471  3.91940313268087086e-15, 9.95054815464739996e-02,
472  // nsing = 23
473  -3.27482357793897640e-13, -1.64138890390689871e-05,
474  1.44278798346454523e-13, 8.96362542918265398e-05,
475  -5.00524303437266481e-14, -4.98699756861136127e-04,
476  1.28274026095767213e-14, 2.82359118537843949e-03,
477  -2.16009593993917109e-15, -1.62538825704327487e-02,
478  1.79368667683853708e-16, 9.50473084594884184e-02
479  };
480 
481  const double npi11[11] = { // precomputed values n * pi
482  0.00000000000000000e+00, 3.14159265358979324e+00, 6.28318530717958648e+00,
483  9.42477796076937972e+00, 1.25663706143591730e+01, 1.57079632679489662e+01,
484  1.88495559215387594e+01, 2.19911485751285527e+01, 2.51327412287183459e+01,
485  2.82743338823081391e+01, 3.14159265358979324e+01
486  };
487  const double a11[11] = { // precomputed Fourier coefficient prefactors
488  4.43113462726379007e-01, 3.79788034073635143e-01, 2.39122407410867584e-01,
489  1.10599187402169792e-01, 3.75782250080904725e-02, 9.37936104296856288e-03,
490  1.71974046186334976e-03, 2.31635559000523461e-04, 2.29192401420125452e-05,
491  1.66589592139340077e-06, 8.89504561311882155e-08
492  };
493  const double taylorarr11[11 * 6] = {
494  // real part imaginary part, low order coefficients last
495  // nsing = 0
496  -1.00000000000000000e+00, 0.00000000000000000e+00,
497  0.00000000000000000e-01, 1.12837916709551257e+00,
498  1.00000000000000000e+00, 0.00000000000000000e+00,
499  // nsing = 1
500  -5.92741768247463996e-01, -7.19914991991294310e-01,
501  -6.73156763521649944e-01, 8.14025039279059577e-01,
502  8.57089811121701143e-01, 4.00248106586639754e-01,
503  // nsing = 2
504  1.26114512111568737e-01, -7.46519337025968199e-01,
505  -8.47666863706379907e-01, 1.89347715957263646e-01,
506  5.39641485816297176e-01, 5.97805988669631615e-01,
507  // nsing = 3
508  4.43238482668529408e-01, -3.03563167310638372e-01,
509  -5.88095866853990048e-01, -2.32638360700858412e-01,
510  2.49595637924601714e-01, 5.77633779156009340e-01,
511  // nsing = 4
512  3.33690792296469441e-01, 3.97048587678703930e-02,
513  -2.66422678503135697e-01, -3.18469797424381480e-01,
514  8.48049724711137773e-02, 4.60546329221462864e-01,
515  // nsing = 5
516  1.42043544696751869e-01, 1.24094227867032671e-01,
517  -8.31224229982140323e-02, -2.40766729258442100e-01,
518  2.11669512031059302e-02, 3.48650139549945097e-01,
519  // nsing = 6
520  3.92113167048952835e-02, 9.03306084789976219e-02,
521  -1.82889636251263500e-02, -1.53816215444915245e-01,
522  3.88103861995563741e-03, 2.72090310854550347e-01,
523  // nsing = 7
524  7.37741897722738503e-03, 5.04625223970221539e-02,
525  -2.87394336989990770e-03, -9.96122819257496929e-02,
526  5.22745478269428248e-04, 2.23361039070072101e-01,
527  // nsing = 8
528  9.69251586187208358e-04, 2.83055679874589732e-02,
529  -3.24986363596307374e-04, -6.97056268370209313e-02,
530  5.17231862038123061e-05, 1.90681117197597520e-01,
531  // nsing = 9
532  9.01625563468897100e-05, 1.74961124275657019e-02,
533  -2.65745127697337342e-05, -5.22070356354932341e-02,
534  3.75952450449939411e-06, 1.67018782142871146e-01,
535  // nsing = 10
536  5.99057675687392260e-06, 1.17993805017130890e-02,
537  -1.57660578509526722e-06, -4.09165023743669707e-02,
538  2.00739683204152177e-07, 1.48879348585662670e-01
539  };
540 }
541 
542 std::complex<double> RooMath::faddeeva(std::complex<double> z)
543 {
544  return faddeeva_impl::faddeeva_smabmq_impl<double, 24, 6, 9>(
545  z.real(), z.imag(), 12., faddeeva_impl::a24,
547 }
548 
549 std::complex<double> RooMath::faddeeva_fast(std::complex<double> z)
550 {
551  return faddeeva_impl::faddeeva_smabmq_impl<double, 11, 3, 3>(
552  z.real(), z.imag(), 8., faddeeva_impl::a11,
554 }
555 
556 std::complex<double> RooMath::erfc(const std::complex<double> z)
557 {
558  double re = -z.real() * z.real() + z.imag() * z.imag();
559  double im = -2. * z.real() * z.imag();
560  faddeeva_impl::cexp(re, im);
561  return (z.real() >= 0.) ?
562  (std::complex<double>(re, im) *
563  faddeeva(std::complex<double>(-z.imag(), z.real()))) :
564  (2. - std::complex<double>(re, im) *
565  faddeeva(std::complex<double>(z.imag(), -z.real())));
566 }
567 
568 std::complex<double> RooMath::erfc_fast(const std::complex<double> z)
569 {
570  double re = -z.real() * z.real() + z.imag() * z.imag();
571  double im = -2. * z.real() * z.imag();
572  faddeeva_impl::cexp(re, im);
573  return (z.real() >= 0.) ?
574  (std::complex<double>(re, im) *
575  faddeeva_fast(std::complex<double>(-z.imag(), z.real()))) :
576  (2. - std::complex<double>(re, im) *
577  faddeeva_fast(std::complex<double>(z.imag(), -z.real())));
578 }
579 
580 std::complex<double> RooMath::erf(const std::complex<double> z)
581 {
582  double re = -z.real() * z.real() + z.imag() * z.imag();
583  double im = -2. * z.real() * z.imag();
584  faddeeva_impl::cexp(re, im);
585  return (z.real() >= 0.) ?
586  (1. - std::complex<double>(re, im) *
587  faddeeva(std::complex<double>(-z.imag(), z.real()))) :
588  (std::complex<double>(re, im) *
589  faddeeva(std::complex<double>(z.imag(), -z.real())) - 1.);
590 }
591 
592 std::complex<double> RooMath::erf_fast(const std::complex<double> z)
593 {
594  double re = -z.real() * z.real() + z.imag() * z.imag();
595  double im = -2. * z.real() * z.imag();
596  faddeeva_impl::cexp(re, im);
597  return (z.real() >= 0.) ?
598  (1. - std::complex<double>(re, im) *
599  faddeeva_fast(std::complex<double>(-z.imag(), z.real()))) :
600  (std::complex<double>(re, im) *
601  faddeeva_fast(std::complex<double>(z.imag(), -z.real())) - 1.);
602 }
603 
604 
606 {
607  // Interpolate array 'ya' with 'n' elements for 'x' (between 0 and 'n'-1)
608 
609  // Int to Double conversion is faster via array lookup than type conversion!
610  static Double_t itod[20] = { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
611  10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0} ;
612  int i,m,ns=1 ;
613  Double_t den,dif,dift/*,ho,hp,w*/,y,dy ;
614  Double_t c[20], d[20] ;
615 
616  dif = fabs(x) ;
617  for(i=1 ; i<=n ; i++) {
618  if ((dift=fabs(x-itod[i-1]))<dif) {
619  ns=i ;
620  dif=dift ;
621  }
622  c[i] = ya[i-1] ;
623  d[i] = ya[i-1] ;
624  }
625 
626  y=ya[--ns] ;
627  for(m=1 ; m<n; m++) {
628  for(i=1 ; i<=n-m ; i++) {
629  den=(c[i+1] - d[i])/itod[m] ;
630  d[i]=(x-itod[i+m-1])*den ;
631  c[i]=(x-itod[i-1])*den ;
632  }
633  dy = (2*ns)<(n-m) ? c[ns+1] : d[ns--] ;
634  y += dy ;
635  }
636  return y ;
637 }
638 
639 
640 
642 {
643  // Interpolate array 'ya' with 'n' elements for 'xa'
644 
645  // Int to Double conversion is faster via array lookup than type conversion!
646 // static Double_t itod[20] = { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
647 // 10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0} ;
648  int i,m,ns=1 ;
649  Double_t den,dif,dift,ho,hp,w,y,dy ;
650  Double_t c[20], d[20] ;
651 
652  dif = fabs(x-xa[0]) ;
653  for(i=1 ; i<=n ; i++) {
654  if ((dift=fabs(x-xa[i-1]))<dif) {
655  ns=i ;
656  dif=dift ;
657  }
658  c[i] = ya[i-1] ;
659  d[i] = ya[i-1] ;
660  }
661 
662  y=ya[--ns] ;
663  for(m=1 ; m<n; m++) {
664  for(i=1 ; i<=n-m ; i++) {
665  ho=xa[i-1]-x ;
666  hp=xa[i-1+m]-x ;
667  w=c[i+1]-d[i] ;
668  den=ho-hp ;
669  if (den==0.) {
670  oocoutE((TObject*)0,Eval) << "RooMath::interpolate ERROR: zero distance between points not allowed" << endl ;
671  return 0 ;
672  }
673  den = w/den ;
674  d[i]=hp*den ;
675  c[i]=ho*den;
676  }
677  dy = (2*ns)<(n-m) ? c[ns+1] : d[ns--] ;
678  y += dy ;
679  }
680  return y ;
681 }
RooMath::erf
static std::complex< double > erf(const std::complex< double > z)
complex erf function
Definition: RooMath.cxx:580
c
#define c(i)
Definition: RSha256.hxx:119
RooMath::faddeeva_fast
static std::complex< double > faddeeva_fast(std::complex< double > z)
evaluate Faddeeva function for complex argument (fast version)
Definition: RooMath.cxx:549
m
auto * m
Definition: textangle.C:8
n
const Int_t n
Definition: legend1.C:16
faddeeva_impl::a11
const double a11[11]
Definition: RooMath.cxx:487
e
#define e(i)
Definition: RSha256.hxx:121
RooMsgService.h
f
#define f(i)
Definition: RSha256.hxx:122
RooFit.h
exp
double exp(double)
NCF
#define NCF(TN, I, C)
Definition: cfortran.h:897
N
#define N
sin
double sin(double)
x
Double_t x[n]
Definition: legend1.C:17
cos
double cos(double)
RooMath::erfc_fast
static std::complex< double > erfc_fast(const std::complex< double > z)
complex erfc function (fast version)
Definition: RooMath.cxx:568
faddeeva_impl::faddeeva_smabmq_impl
static std::complex< T > faddeeva_smabmq_impl(T zre, T zim, const T tm, const T(&a)[N], const T(&npi)[N], const T(&taylorarr)[N *NTAYLOR *2])
Definition: RooMath.cxx:119
oocoutE
#define oocoutE(o, a)
Definition: RooMsgService.h:48
RooMath::erfc
static std::complex< double > erfc(const std::complex< double > z)
complex erfc function
Definition: RooMath.cxx:556
faddeeva_impl::a24
static const double a24[24]
Definition: RooMath.cxx:299
faddeeva_impl
Definition: RooMath.cxx:35
ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131
faddeeva_impl::taylorarr11
const double taylorarr11[11 *6]
Definition: RooMath.cxx:493
RooMath::interpolate
static Double_t interpolate(Double_t yArr[], Int_t nOrder, Double_t x)
Definition: RooMath.cxx:605
a
auto * a
Definition: textangle.C:12
TGeant4Unit::ns
static constexpr double ns
Definition: TGeant4SystemOfUnits.h:167
y
Double_t y[n]
Definition: legend1.C:17
RooMath::faddeeva
static std::complex< double > faddeeva(std::complex< double > z)
evaluate Faddeeva function for complex argument
Definition: RooMath.cxx:542
Double_t
double Double_t
Definition: RtypesCore.h:59
faddeeva_impl::cexp
static void cexp(double &re, double &im)
Definition: RooMath.cxx:36
TObject
Definition: TObject.h:37
RooMath::erf_fast
static std::complex< double > erf_fast(const std::complex< double > z)
complex erf function (fast version)
Definition: RooMath.cxx:592
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:52
d
#define d(i)
Definition: RSha256.hxx:120
faddeeva_impl::npi11
const double npi11[11]
Definition: RooMath.cxx:481
faddeeva_impl::taylorarr24
static const double taylorarr24[24 *12]
Definition: RooMath.cxx:309
RooFit::Eval
@ Eval
Definition: RooGlobalFunc.h:68
Riostream.h
RooMath.h
faddeeva_impl::npi24
static const double npi24[24]
Definition: RooMath.cxx:289
int