Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15\ingroup Roobatchcompute
16
17This file contains vectorizable computation functions for PDFs and other Roofit objects.
18The same source file can also be compiled with nvcc. All functions have a single `Batches`
19object as an argument passed by value, which contains all the information necessary for the
20computation. In case of cuda computations, the loops have a step (stride) the size of the grid
21which allows for reusing the same code as the cpu implementations, easier debugging and in terms
22of performance, maximum memory coalescing. For more details, see
23https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
24**/
25
26#include "RooBatchCompute.h"
27#include "RooNaNPacker.h"
28#include "RooVDTHeaders.h"
29#include "Batches.h"
30
31#include <TMath.h>
32
34
35#ifdef __CUDACC__
36#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
37#define STEP blockDim.x *gridDim.x
38#else
39#define BEGIN 0
40#define STEP 1
41#endif // #ifdef __CUDACC__
42
43namespace RooBatchCompute {
44namespace RF_ARCH {
45
47{
48 const int nPdfs = batches.getNExtraArgs();
49 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
50 batches._output[i] = batches.extraArg(0) * batches[0][i];
51 for (int pdf = 1; pdf < nPdfs; pdf++) {
52 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
53 batches._output[i] += batches.extraArg(pdf) * batches[pdf][i];
54 }
55}
56
58{
59 Batch m = batches[0];
60 Batch m0 = batches[1];
61 Batch c = batches[2];
62 Batch p = batches[3];
63 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
64 const double t = m[i] / m0[i];
65 const double u = 1 - t * t;
66 batches._output[i] = c[i] * u + p[i] * fast_log(u);
67 }
68 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
69 if (m[i] >= m0[i]) {
70 batches._output[i] = 0.0;
71 } else {
72 batches._output[i] = m[i] * fast_exp(batches._output[i]);
73 }
74 }
75}
76
78{
79 Batch coef0 = batches[0];
80 Batch coef1 = batches[1];
81 Batch tagFlav = batches[2];
82 Batch delMistag = batches[3];
83 Batch mixState = batches[4];
84 Batch mistag = batches[5];
85
86 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
87 batches._output[i] =
88 coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
89 }
90}
91
93{
94 const int nCoef = batches.getNExtraArgs() - 2;
95 const int degree = nCoef - 1;
96 const double xmin = batches.extraArg(nCoef);
97 const double xmax = batches.extraArg(nCoef + 1);
98 Batch xData = batches[0];
99
100 // apply binomial coefficient in-place so we don't have to allocate new memory
101 double binomial = 1.0;
102 for (int k = 0; k < nCoef; k++) {
103 batches.setExtraArg(k, batches.extraArg(k) * binomial);
104 binomial = (binomial * (degree - k)) / (k + 1);
105 }
106
107 if (STEP == 1) {
108 double X[bufferSize];
109 double _1_X[bufferSize];
110 double powX[bufferSize];
111 double pow_1_X[bufferSize];
112 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
113 powX[i] = pow_1_X[i] = 1.0;
114 X[i] = (xData[i] - xmin) / (xmax - xmin);
115 _1_X[i] = 1 - X[i];
116 batches._output[i] = 0.0;
117 }
118
119 // raising 1-x to the power of degree
120 for (int k = 2; k <= degree; k += 2) {
121 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
122 pow_1_X[i] *= _1_X[i] * _1_X[i];
123 }
124
125 if (degree % 2 == 1) {
126 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
127 pow_1_X[i] *= _1_X[i];
128 }
129
130 // inverting 1-x ---> 1/(1-x)
131 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
132 _1_X[i] = 1 / _1_X[i];
133
134 for (int k = 0; k < nCoef; k++) {
135 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
136 batches._output[i] += batches.extraArg(k) * powX[i] * pow_1_X[i];
137
138 // calculating next power for x and 1-x
139 powX[i] *= X[i];
140 pow_1_X[i] *= _1_X[i];
141 }
142 }
143 } else {
144 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
145 batches._output[i] = 0.0;
146 const double X = (xData[i] - xmin) / (xmax - xmin);
147 double powX = 1.0;
148 double pow_1_X = 1.0;
149 for (int k = 1; k <= degree; k++)
150 pow_1_X *= 1 - X;
151 const double _1_X = 1 / (1 - X);
152 for (int k = 0; k < nCoef; k++) {
153 batches._output[i] += batches.extraArg(k) * powX * pow_1_X;
154 powX *= X;
155 pow_1_X *= _1_X;
156 }
157 }
158 }
159
160 // reset extraArgs values so we don't mutate the Batches object
161 binomial = 1.0;
162 for (int k = 0; k < nCoef; k++) {
163 batches.setExtraArg(k, batches.extraArg(k) / binomial);
164 binomial = (binomial * (degree - k)) / (k + 1);
165 }
166}
167
169{
170 Batch X = batches[0];
171 Batch M = batches[1];
172 Batch SL = batches[2];
173 Batch SR = batches[3];
174 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
175 double arg = X[i] - M[i];
176 if (arg < 0) {
177 arg /= SL[i];
178 } else {
179 arg /= SR[i];
180 }
181 batches._output[i] = fast_exp(-0.5 * arg * arg);
182 }
183}
184
186{
187 Batch X = batches[0];
188 Batch M = batches[1];
189 Batch W = batches[2];
190 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
191 const double arg = X[i] - M[i];
192 batches._output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
193 }
194}
195
197{
198 Batch X = batches[0];
199 Batch XP = batches[1];
200 Batch SP = batches[2];
201 Batch XI = batches[3];
202 Batch R1 = batches[4];
203 Batch R2 = batches[5];
204 const double r3 = log(2.0);
205 const double r6 = exp(-6.0);
206 const double r7 = 2 * sqrt(2 * log(2.0));
207
208 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
209 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
210 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
211 const double hp = 1 / (SP[i] * r7);
212 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
213 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
214
215 double r5 = 1.0;
216 if (XI[i] > r6 || XI[i] < -r6)
217 r5 = XI[i] / fast_log(r4 + XI[i]);
218
219 double factor = 1;
220 double y = X[i] - x1;
221 double Yp = XP[i] - x1;
222 double yi = r4 - XI[i];
223 double rho = R1[i];
224 if (X[i] >= x2) {
225 factor = -1;
226 y = X[i] - x2;
227 Yp = XP[i] - x2;
228 yi = r4 + XI[i];
229 rho = R2[i];
230 }
231
232 batches._output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
233 if (X[i] >= x1 && X[i] < x2) {
234 batches._output[i] =
235 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
236 batches._output[i] *= -batches._output[i] * r3;
237 }
238 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
239 batches._output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
240 }
241 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
242 batches._output[i] = fast_exp(batches._output[i]);
243}
244
246{
247 Batch M = batches[0];
248 Batch M0 = batches[1];
249 Batch S = batches[2];
250 Batch A = batches[3];
251 Batch N = batches[4];
252 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
253 const double t = (M[i] - M0[i]) / S[i];
254 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i])) {
255 batches._output[i] = -0.5 * t * t;
256 } else {
257 batches._output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
258 batches._output[i] = fast_log(batches._output[i]);
259 batches._output[i] *= N[i];
260 batches._output[i] -= 0.5 * A[i] * A[i];
261 }
262 }
263 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
264 batches._output[i] = fast_exp(batches._output[i]);
265}
266
268{
269 Batch xData = batches[0];
270 const int nCoef = batches.getNExtraArgs() - 2;
271 const double xmin = batches.extraArg(nCoef);
272 const double xmax = batches.extraArg(nCoef + 1);
273
274 if (STEP == 1) {
275 double prev[bufferSize][2];
276 double X[bufferSize];
277
278 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
279 // set a0-->prev[i][0] and a1-->prev[i][1]
280 // and x tranfsformed to range[-1..1]-->X[i]
281 prev[i][0] = batches._output[i] = 1.0;
282 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
283 }
284 for (int k = 0; k < nCoef; k++) {
285 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
286 batches._output[i] += prev[i][1] * batches.extraArg(k);
287
288 // compute next order
289 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
290 prev[i][0] = prev[i][1];
291 prev[i][1] = next;
292 }
293 }
294 } else {
295 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
296 double prev0 = 1.0;
297 double prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
298 double X = prev1;
299 batches._output[i] = 1.0;
300 for (int k = 0; k < nCoef; k++) {
301 batches._output[i] += prev1 * batches.extraArg(k);
302
303 // compute next order
304 const double next = 2 * X * prev1 - prev0;
305 prev0 = prev1;
306 prev1 = next;
307 }
308 }
309 }
310}
311
313{
314 Batch X = batches[0];
315 const double ndof = batches.extraArg(0);
316 const double gamma = 1 / std::tgamma(ndof / 2.0);
317 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
318 batches._output[i] = gamma;
319
320 constexpr double ln2 = 0.693147180559945309417232121458;
321 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
322 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
323 batches._output[i] *= fast_exp(0.5 * arg);
324 }
325}
326
328{
329 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
330 batches._output[i] = 0.0 + (batches[0][i] == 1.0);
331 }
332}
333
335{
336 Batch DM = batches[0];
337 Batch DM0 = batches[1];
338 Batch C = batches[2];
339 Batch A = batches[3];
340 Batch B = batches[4];
341 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
342 const double ratio = DM[i] / DM0[i];
343 const double arg1 = (DM0[i] - DM[i]) / C[i];
344 const double arg2 = A[i] * fast_log(ratio);
345 batches._output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
346 }
347
348 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
349 if (batches._output[i] < 0)
350 batches._output[i] = 0;
351 }
352}
353
355{
356 int lowestOrder = batches.extraArg(0);
357 int nTerms = batches.extraArg(1);
358 auto x = batches[0];
359
360 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
361 batches._output[i] = 0.0;
362 double xTmp = std::pow(x[i], lowestOrder);
363 for (int k = 0; k < nTerms; ++k) {
364 batches._output[i] += batches[k + 1][i] * xTmp;
365 xTmp *= x[i];
366 }
367 batches._output[i] = std::exp(batches._output[i]);
368 }
369}
370
372{
373 Batch x = batches[0];
374 Batch c = batches[1];
375 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
376 batches._output[i] = fast_exp(x[i] * c[i]);
377 }
378}
379
381{
382 Batch x = batches[0];
383 Batch c = batches[1];
384 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
385 batches._output[i] = fast_exp(-x[i] * c[i]);
386 }
387}
388
390{
391 Batch X = batches[0];
392 Batch G = batches[1];
393 Batch B = batches[2];
394 Batch M = batches[3];
395 double gamma = -std::lgamma(G[0]);
396 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
397 if (X[i] == M[i]) {
398 batches._output[i] = (G[i] == 1.0) / B[i];
399 } else if (G.isItVector()) {
400 batches._output[i] = -std::lgamma(G[i]);
401 } else {
402 batches._output[i] = gamma;
403 }
404 }
405
406 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
407 if (X[i] != M[i]) {
408 const double invBeta = 1 / B[i];
409 double arg = (X[i] - M[i]) * invBeta;
410 batches._output[i] -= arg;
411 arg = fast_log(arg);
412 batches._output[i] += arg * (G[i] - 1);
413 batches._output[i] = fast_exp(batches._output[i]);
414 batches._output[i] *= invBeta;
415 }
416 }
417}
418
420{
421 const double root2 = std::sqrt(2.);
422 const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
423
424 const bool isMinus = batches.extraArg(0) < 0.0;
425 const bool isPlus = batches.extraArg(0) > 0.0;
426
427 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
428
429 const double x = batches[0][i];
430 const double mean = batches[1][i] * batches[2][i];
431 const double sigma = batches[3][i] * batches[4][i];
432 const double tau = batches[5][i];
433
434 if (tau == 0.0) {
435 // Straight Gaussian, used for unconvoluted PDF or expBasis with 0 lifetime
436 double xprime = (x - mean) / sigma;
437 double result = std::exp(-0.5 * xprime * xprime) / (sigma * root2pi);
438 if (!isMinus && !isPlus)
439 result *= 2;
440 batches._output[i] = result;
441 } else {
442 // Convolution with exp(-t/tau)
443 const double xprime = (x - mean) / tau;
444 const double c = sigma / (root2 * tau);
445 const double u = xprime / (2 * c);
446
447 double result = 0.0;
448 if (!isMinus)
449 result += RooHeterogeneousMath::evalCerf(0, -u, c).real();
450 if (!isPlus)
451 result += RooHeterogeneousMath::evalCerf(0, u, c).real();
452 batches._output[i] = result;
453 }
454 }
455}
456
458{
459 auto x = batches[0];
460 auto mean = batches[1];
461 auto sigma = batches[2];
462 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
463 const double arg = x[i] - mean[i];
464 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
465 batches._output[i] = fast_exp(arg * arg * halfBySigmaSq);
466 }
467}
468
470{
471 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
472 batches._output[i] = batches[0][i];
473 }
474}
475
477{
478 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
479 batches._output[i] = -fast_log(batches[0][i]);
480 // Multiply by weights if they exist
481 if (batches.extraArg(0)) {
482 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
483 batches._output[i] *= batches[1][i];
484 }
485}
486
488{
489 Batch mass = batches[0];
490 Batch mu = batches[1];
491 Batch lambda = batches[2];
492 Batch gamma = batches[3];
493 Batch delta = batches[4];
494 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
495 const double massThreshold = batches.extraArg(0);
496
497 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
498 const double arg = (mass[i] - mu[i]) / lambda[i];
499#ifdef R__HAS_VDT
500 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
501#else
502 const double asinh_arg = asinh(arg);
503#endif
504 const double expo = gamma[i] + delta[i] * asinh_arg;
505 const double result =
506 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
507
508 const double passThrough = mass[i] >= massThreshold;
509 batches._output[i] = result * passThrough;
510 }
511}
512
513/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
514 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
515 * and rewritten to enable vectorization.
516 */
518{
519 auto case0 = [](double x) {
520 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
521 const double u = fast_exp(x + 1.0);
522 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
523 };
524 auto case1 = [](double x) {
525 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
526 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
527 const double u = fast_exp(-x - 1);
528 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
529 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
530 };
531 auto case2 = [](double x) {
532 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
533 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
534 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
535 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
536 };
537 auto case3 = [](double x) {
538 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
539 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
540 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
541 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
542 };
543 auto case4 = [](double x) {
544 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
545 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
546 const double u = 1 / x;
547 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
548 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
549 };
550 auto case5 = [](double x) {
551 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
552 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
553 const double u = 1 / x;
554 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
555 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
556 };
557 auto case6 = [](double x) {
558 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
559 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
560 const double u = 1 / x;
561 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
562 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
563 };
564 auto case7 = [](double x) {
565 const double a2[2] = {-1.845568670, -4.284640743};
566 const double u = 1 / (x - x * fast_log(x) / (x + 1));
567 return u * u * (1 + (a2[0] + a2[1] * u) * u);
568 };
569
570 Batch X = batches[0];
571 Batch M = batches[1];
572 Batch S = batches[2];
573
574 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
575 batches._output[i] = (X[i] - M[i]) / S[i];
576
577 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
578 if (S[i] <= 0.0) {
579 batches._output[i] = 0;
580 } else if (batches._output[i] < -5.5) {
581 batches._output[i] = case0(batches._output[i]);
582 } else if (batches._output[i] < -1.0) {
583 batches._output[i] = case1(batches._output[i]);
584 } else if (batches._output[i] < 1.0) {
585 batches._output[i] = case2(batches._output[i]);
586 } else if (batches._output[i] < 5.0) {
587 batches._output[i] = case3(batches._output[i]);
588 } else if (batches._output[i] < 12.0) {
589 batches._output[i] = case4(batches._output[i]);
590 } else if (batches._output[i] < 50.0) {
591 batches._output[i] = case5(batches._output[i]);
592 } else if (batches._output[i] < 300.) {
593 batches._output[i] = case6(batches._output[i]);
594 } else {
595 batches._output[i] = case7(batches._output[i]);
596 }
597 }
598}
599
601{
602 Batch X = batches[0];
603 Batch M0 = batches[1];
604 Batch K = batches[2];
605 constexpr double rootOf2pi = 2.506628274631000502415765284811;
606 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
607 double lnxOverM0 = fast_log(X[i] / M0[i]);
608 double lnk = fast_log(K[i]);
609 if (lnk < 0)
610 lnk = -lnk;
611 double arg = lnxOverM0 / lnk;
612 arg *= -0.5 * arg;
613 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
614 }
615}
616
618{
619 Batch X = batches[0];
620 Batch M0 = batches[1];
621 Batch K = batches[2];
622 constexpr double rootOf2pi = 2.506628274631000502415765284811;
623 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
624 double lnxOverM0 = fast_log(X[i]) - M0[i];
625 double lnk = K[i];
626 if (lnk < 0)
627 lnk = -lnk;
628 double arg = lnxOverM0 / lnk;
629 arg *= -0.5 * arg;
630 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
631 }
632}
633
635{
636 auto rawVal = batches[0];
637 auto normVal = batches[1];
638
639 int nEvalErrorsType0 = 0;
640 int nEvalErrorsType1 = 0;
641 int nEvalErrorsType2 = 0;
642
643 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
644 double out = 0.0;
645 // batches._output[i] = rawVal[i] / normVar[i];
646 if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
647 // Unreasonable normalisations. A zero integral can be tolerated if the function vanishes, though.
648 out = RooNaNPacker::packFloatIntoNaN(-normVal[i] + (rawVal[i] < 0. ? -rawVal[i] : 0.));
649 nEvalErrorsType0++;
650 } else if (rawVal[i] < 0.) {
651 // The pdf value is less than zero.
652 out = RooNaNPacker::packFloatIntoNaN(-rawVal[i]);
653 nEvalErrorsType1++;
654 } else if (std::isnan(rawVal[i])) {
655 // The pdf value is Not-a-Number.
656 out = rawVal[i];
657 nEvalErrorsType2++;
658 } else {
659 out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
660 }
661 batches._output[i] = out;
662 }
663
664 if (nEvalErrorsType0 > 0)
665 batches.setExtraArg(0, batches.extraArg(0) + nEvalErrorsType0);
666 if (nEvalErrorsType1 > 1)
667 batches.setExtraArg(1, batches.extraArg(1) + nEvalErrorsType1);
668 if (nEvalErrorsType2 > 2)
669 batches.setExtraArg(2, batches.extraArg(2) + nEvalErrorsType2);
670}
671
672/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
673 * argasinh -> the argument of TMath::ASinH()
674 * argln -> the argument of the logarithm that replaces AsinH
675 * asinh -> the value that the function evaluates to
676 *
677 * ln is the logarithm that was solely present in the initial
678 * formula, that is before the asinh replacement
679 */
681{
682 Batch X = batches[0];
683 Batch P = batches[1];
684 Batch W = batches[2];
685 Batch T = batches[3];
686 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
687 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
688 double argasinh = 0.5 * xi * T[i];
689 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
690 double asinh = fast_log(argln);
691
692 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
693 double ln = fast_log(argln2);
694 batches._output[i] = ln / asinh;
695 batches._output[i] *= -0.125 * xi * xi * batches._output[i];
696 batches._output[i] -= 2.0 / xi / xi * asinh * asinh;
697 }
698
699 // faster if you exponentiate in a separate loop (dark magic!)
700 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
701 batches._output[i] = fast_exp(batches._output[i]);
702}
703
705{
706 Batch x = batches[0];
707 Batch mean = batches[1];
708 bool protectNegative = batches.extraArg(0);
709 bool noRounding = batches.extraArg(1);
710 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
711 const double x_i = noRounding ? x[i] : floor(x[i]);
712 batches._output[i] = std::lgamma(x_i + 1.);
713 }
714
715 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
716 const double x_i = noRounding ? x[i] : floor(x[i]);
717 const double logMean = fast_log(mean[i]);
718 const double logPoisson = x_i * logMean - mean[i] - batches._output[i];
719 batches._output[i] = fast_exp(logPoisson);
720
721 // Cosmetics
722 if (x_i < 0) {
723 batches._output[i] = 0;
724 } else if (x_i == 0) {
725 batches._output[i] = 1 / fast_exp(mean[i]);
726 }
727
728 if (protectNegative && mean[i] < 0)
729 batches._output[i] = 1.E-3;
730 }
731}
732
734{
735 const int nCoef = batches.extraArg(0);
736 const std::size_t nEvents = batches.getNEvents();
737 Batch x = batches[nCoef];
738
739 for (size_t i = BEGIN; i < nEvents; i += STEP) {
740 batches._output[i] = batches[nCoef - 1][i];
741 }
742
743 // Indexes are in range 0..nCoef-1 but coefList[nCoef-1] has already been
744 // processed.
745 for (int k = nCoef - 2; k >= 0; k--) {
746 for (size_t i = BEGIN; i < nEvents; i += STEP) {
747 batches._output[i] = batches[k][i] + x[i] * batches._output[i];
748 }
749 }
750}
751
753{
754 const int nCoef = batches.extraArg(0);
755 Batch x = batches[0];
756
757 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
758 batches._output[i] = 0.0;
759 for (int k = 0; k < nCoef; ++k) {
760 batches._output[i] += batches[2 * k + 1][i] * std::pow(x[i], batches[2 * k + 2][i]);
761 }
762 }
763}
764
766{
767 const int nPdfs = batches.extraArg(0);
768 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
769 batches._output[i] = 1.;
770 }
771 for (int pdf = 0; pdf < nPdfs; pdf++) {
772 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
773 batches._output[i] *= batches[pdf][i];
774 }
775 }
776}
777
779{
780 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
781 batches._output[i] = batches[0][i] / batches[1][i];
782 }
783}
784
786{
787
788 const bool isMinus = batches.extraArg(0) < 0.0;
789 const bool isPlus = batches.extraArg(0) > 0.0;
790 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
791 double x = batches[0][i];
792 // Enforce sign compatibility
793 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
794 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]);
795 }
796}
797
799{
800 const bool isMinus = batches.extraArg(0) < 0.0;
801 const bool isPlus = batches.extraArg(0) > 0.0;
802 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
803 double x = batches[0][i];
804 // Enforce sign compatibility
805 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
806 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_sin(x * batches[2][i]);
807 }
808}
809
811{
812 const bool isMinus = batches.extraArg(0) < 0.0;
813 const bool isPlus = batches.extraArg(0) > 0.0;
814 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
815 double x = batches[0][i];
816 // Enforce sign compatibility
817 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
818 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_cos(x * batches[2][i]);
819 }
820}
821
823{
824 const bool isMinus = batches.extraArg(0) < 0.0;
825 const bool isPlus = batches.extraArg(0) > 0.0;
826 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
827 double x = batches[0][i];
828 // Enforce sign compatibility
829 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
830 if (isOutOfSign) {
831 batches._output[i] = 0.0;
832 } else {
833 const double tscaled = std::abs(x) / batches[1][i];
834 batches._output[i] = fast_exp(-tscaled) * tscaled;
835 }
836 }
837}
838
840{
841 const bool isMinus = batches.extraArg(0) < 0.0;
842 const bool isPlus = batches.extraArg(0) > 0.0;
843 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
844 double x = batches[0][i];
845 // Enforce sign compatibility
846 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
847 if (isOutOfSign) {
848 batches._output[i] = 0.0;
849 } else {
850 const double tscaled = std::abs(x) / batches[1][i];
851 batches._output[i] = fast_exp(-tscaled) * tscaled * tscaled;
852 }
853 }
854}
855
857{
858 const bool isMinus = batches.extraArg(0) < 0.0;
859 const bool isPlus = batches.extraArg(0) > 0.0;
860 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
861 double x = batches[0][i];
862 // Enforce sign compatibility
863 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
864 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * sinh(x * batches[2][i] * 0.5);
865 }
866}
867
869{
870 const bool isMinus = batches.extraArg(0) < 0.0;
871 const bool isPlus = batches.extraArg(0) > 0.0;
872 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
873 double x = batches[0][i];
874 // Enforce sign compatibility
875 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
876 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * cosh(x * batches[2][i] * .5);
877 }
878}
879
881{
882 Batch X = batches[0];
883 Batch M = batches[1];
884 Batch W = batches[2];
885 Batch S = batches[3];
886 const double invSqrt2 = 0.707106781186547524400844362105;
887 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
888 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
889 if (S[i] == 0.0 && W[i] == 0.0) {
890 batches._output[i] = 1.0;
891 } else if (S[i] == 0.0) {
892 batches._output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
893 } else if (W[i] == 0.0) {
894 batches._output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
895 } else {
896 batches._output[i] = invSqrt2 / S[i];
897 }
898 }
899
900 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
901 if (S[i] != 0.0 && W[i] != 0.0) {
902 if (batches._output[i] < 0)
903 batches._output[i] = -batches._output[i];
904 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
905 RooHeterogeneousMath::STD::complex<double> z(batches._output[i] * (X[i] - M[i]),
906 factor * batches._output[i] * W[i]);
907 batches._output[i] *= RooHeterogeneousMath::faddeeva(z).real();
908 }
909 }
910}
911
912/// Returns a std::vector of pointers to the compute functions in this file.
913std::vector<void (*)(BatchesHandle)> getFunctions()
914{
915 return {computeAddPdf,
954}
955} // End namespace RF_ARCH
956} // End namespace RooBatchCompute
#define STEP
#define BEGIN
#define c(i)
Definition RSha256.hxx:101
#define __rooglobal__
#define N
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
float xmin
float xmax
__roodevice__ std::size_t getNEvents() const
Definition Batches.h:69
__roodevice__ std::size_t getNExtraArgs() const
Definition Batches.h:70
__roodevice__ void setExtraArg(std::size_t i, double val)
Definition Batches.h:72
__roodevice__ double extraArg(std::size_t i) const
Definition Batches.h:71
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
__rooglobal__ void computeTruthModelCosBasis(BatchesHandle batches)
__rooglobal__ void computeExponential(BatchesHandle batches)
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
__rooglobal__ void computeLandau(BatchesHandle batches)
__rooglobal__ void computeArgusBG(BatchesHandle batches)
__rooglobal__ void computeBukin(BatchesHandle batches)
__rooglobal__ void computeLognormalStandard(BatchesHandle batches)
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
__rooglobal__ void computeNormalizedPdf(BatchesHandle batches)
__rooglobal__ void computePolynomial(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinhBasis(BatchesHandle batches)
__rooglobal__ void computePoisson(BatchesHandle batches)
__rooglobal__ void computeTruthModelCoshBasis(BatchesHandle batches)
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinBasis(BatchesHandle batches)
__rooglobal__ void computeExpPoly(BatchesHandle batches)
__rooglobal__ void computeRatio(BatchesHandle batches)
__rooglobal__ void computeAddPdf(BatchesHandle batches)
__rooglobal__ void computeTruthModelLinBasis(BatchesHandle batches)
__rooglobal__ void computeChiSquare(BatchesHandle batches)
__rooglobal__ void computeTruthModelQuadBasis(BatchesHandle batches)
__rooglobal__ void computeDeltaFunction(BatchesHandle batches)
__rooglobal__ void computeChebychev(BatchesHandle batches)
__rooglobal__ void computeExponentialNeg(BatchesHandle batches)
__rooglobal__ void computeIdentity(BatchesHandle batches)
__rooglobal__ void computeLognormal(BatchesHandle batches)
__rooglobal__ void computeGaussModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeVoigtian(BatchesHandle batches)
__rooglobal__ void computePower(BatchesHandle batches)
__rooglobal__ void computeTruthModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeGaussian(BatchesHandle batches)
__rooglobal__ void computeBernstein(BatchesHandle batches)
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
__rooglobal__ void computeGamma(BatchesHandle batches)
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Batches & BatchesHandle
Definition Batches.h:84
__rooglobal__ void computeJohnson(BatchesHandle batches)
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
__rooglobal__ void computeProdPdf(BatchesHandle batches)
__rooglobal__ void computeCBShape(BatchesHandle batches)
__rooglobal__ void computeBMixDecay(BatchesHandle batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_sin(double x)
constexpr std::size_t bufferSize
Definition Batches.h:32
__roodevice__ double fast_log(double x)
__roodevice__ double fast_cos(double x)
__roodevice__ double fast_isqrt(double x)
__roodevice__ __roohost__ STD::complex< double > faddeeva(STD::complex< double > z)
__roohost__ __roodevice__ STD::complex< double > evalCerf(double swt, double u, double c)
constexpr Double_t TwoPi()
Definition TMath.h:44
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
TMarker m
Definition textangle.C:8