Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15\ingroup Roobatchcompute
16
17This file contains vectorizable computation functions for PDFs and other Roofit objects.
18The same source file can also be compiled with nvcc. All functions have a single `Batches`
19object as an argument passed by value, which contains all the information necessary for the
20computation. In case of cuda computations, the loops have a step (stride) the size of the grid
21which allows for reusing the same code as the cpu implementations, easier debugging and in terms
22of performance, maximum memory coalescing. For more details, see
23https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
24**/
25
26#include "RooBatchCompute.h"
27#include "RooNaNPacker.h"
28#include "RooVDTHeaders.h"
29#include "Batches.h"
30
31#include <TMath.h>
32
34
35#include <vector>
36
37#ifdef __CUDACC__
38#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
39#define STEP blockDim.x *gridDim.x
40#else
41#define BEGIN 0
42#define STEP 1
43#endif // #ifdef __CUDACC__
44
45namespace RooBatchCompute {
46namespace RF_ARCH {
47
49{
50 const int nPdfs = batches.nExtra;
51 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
52 batches.output[i] = batches.extra[0] * batches.args[0][i];
53 }
54 for (int pdf = 1; pdf < nPdfs; pdf++) {
55 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
56 batches.output[i] += batches.extra[pdf] * batches.args[pdf][i];
57 }
58 }
59}
60
62{
63 Batch m = batches.args[0];
64 Batch m0 = batches.args[1];
65 Batch c = batches.args[2];
66 Batch p = batches.args[3];
67 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
68 const double t = m[i] / m0[i];
69 const double u = 1 - t * t;
70 batches.output[i] = c[i] * u + p[i] * fast_log(u);
71 }
72 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
73 if (m[i] >= m0[i]) {
74 batches.output[i] = 0.0;
75 } else {
76 batches.output[i] = m[i] * fast_exp(batches.output[i]);
77 }
78 }
79}
80
82{
83 Batch coef0 = batches.args[0];
84 Batch coef1 = batches.args[1];
85 Batch tagFlav = batches.args[2];
86 Batch delMistag = batches.args[3];
87 Batch mixState = batches.args[4];
88 Batch mistag = batches.args[5];
89
90 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
91 batches.output[i] =
92 coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
93 }
94}
95
97{
98 const int nCoef = batches.nExtra - 2;
99 const int degree = nCoef - 1;
100 const double xmin = batches.extra[nCoef];
101 const double xmax = batches.extra[nCoef + 1];
102 Batch xData = batches.args[0];
103
104 // apply binomial coefficient in-place so we don't have to allocate new memory
105 double binomial = 1.0;
106 for (int k = 0; k < nCoef; k++) {
107 batches.extra[k] = batches.extra[k] * binomial;
108 binomial = (binomial * (degree - k)) / (k + 1);
109 }
110
111 if (STEP == 1) {
112 double X[bufferSize];
113 double _1_X[bufferSize];
114 double powX[bufferSize];
115 double pow_1_X[bufferSize];
116 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
117 powX[i] = pow_1_X[i] = 1.0;
118 X[i] = (xData[i] - xmin) / (xmax - xmin);
119 _1_X[i] = 1 - X[i];
120 batches.output[i] = 0.0;
121 }
122
123 // raising 1-x to the power of degree
124 for (int k = 2; k <= degree; k += 2) {
125 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
126 pow_1_X[i] *= _1_X[i] * _1_X[i];
127 }
128
129 if (degree % 2 == 1) {
130 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
131 pow_1_X[i] *= _1_X[i];
132 }
133
134 // inverting 1-x ---> 1/(1-x)
135 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
136 _1_X[i] = 1 / _1_X[i];
137
138 for (int k = 0; k < nCoef; k++) {
139 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
140 batches.output[i] += batches.extra[k] * powX[i] * pow_1_X[i];
141
142 // calculating next power for x and 1-x
143 powX[i] *= X[i];
144 pow_1_X[i] *= _1_X[i];
145 }
146 }
147 } else {
148 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
149 batches.output[i] = 0.0;
150 const double X = (xData[i] - xmin) / (xmax - xmin);
151 double powX = 1.0;
152 double pow_1_X = 1.0;
153 for (int k = 1; k <= degree; k++)
154 pow_1_X *= 1 - X;
155 const double _1_X = 1 / (1 - X);
156 for (int k = 0; k < nCoef; k++) {
157 batches.output[i] += batches.extra[k] * powX * pow_1_X;
158 powX *= X;
159 pow_1_X *= _1_X;
160 }
161 }
162 }
163
164 // reset extraArgs values so we don't mutate the Batches object
165 binomial = 1.0;
166 for (int k = 0; k < nCoef; k++) {
167 batches.extra[k] = batches.extra[k] / binomial;
168 binomial = (binomial * (degree - k)) / (k + 1);
169 }
170}
171
173{
174 Batch X = batches.args[0];
175 Batch M = batches.args[1];
176 Batch SL = batches.args[2];
177 Batch SR = batches.args[3];
178 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
179 double arg = X[i] - M[i];
180 if (arg < 0) {
181 arg /= SL[i];
182 } else {
183 arg /= SR[i];
184 }
185 batches.output[i] = fast_exp(-0.5 * arg * arg);
186 }
187}
188
190{
191 Batch X = batches.args[0];
192 Batch M = batches.args[1];
193 Batch W = batches.args[2];
194 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
195 const double arg = X[i] - M[i];
196 batches.output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
197 }
198}
199
201{
202 Batch X = batches.args[0];
203 Batch XP = batches.args[1];
204 Batch SP = batches.args[2];
205 Batch XI = batches.args[3];
206 Batch R1 = batches.args[4];
207 Batch R2 = batches.args[5];
208 const double r3 = log(2.0);
209 const double r6 = exp(-6.0);
210 const double r7 = 2 * sqrt(2 * log(2.0));
211
212 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
213 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
214 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
215 const double hp = 1 / (SP[i] * r7);
216 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
217 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
218
219 double r5 = 1.0;
220 if (XI[i] > r6 || XI[i] < -r6)
221 r5 = XI[i] / fast_log(r4 + XI[i]);
222
223 double factor = 1;
224 double y = X[i] - x1;
225 double Yp = XP[i] - x1;
226 double yi = r4 - XI[i];
227 double rho = R1[i];
228 if (X[i] >= x2) {
229 factor = -1;
230 y = X[i] - x2;
231 Yp = XP[i] - x2;
232 yi = r4 + XI[i];
233 rho = R2[i];
234 }
235
236 batches.output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
237 if (X[i] >= x1 && X[i] < x2) {
238 batches.output[i] =
239 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
240 batches.output[i] *= -batches.output[i] * r3;
241 }
242 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
243 batches.output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
244 }
245 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
246 batches.output[i] = fast_exp(batches.output[i]);
247}
248
250{
251 Batch M = batches.args[0];
252 Batch M0 = batches.args[1];
253 Batch S = batches.args[2];
254 Batch A = batches.args[3];
255 Batch N = batches.args[4];
256 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
257 const double t = (M[i] - M0[i]) / S[i];
258 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i])) {
259 batches.output[i] = -0.5 * t * t;
260 } else {
261 batches.output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
262 batches.output[i] = fast_log(batches.output[i]);
263 batches.output[i] *= N[i];
264 batches.output[i] -= 0.5 * A[i] * A[i];
265 }
266 }
267 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
268 batches.output[i] = fast_exp(batches.output[i]);
269}
270
272{
273 Batch xData = batches.args[0];
274 const int nCoef = batches.nExtra - 2;
275 const double xmin = batches.extra[nCoef];
276 const double xmax = batches.extra[nCoef + 1];
277
278 if (STEP == 1) {
279 double prev[bufferSize][2];
280 double X[bufferSize];
281
282 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
283 // set a0-->prev[i][0] and a1-->prev[i][1]
284 // and x tranfsformed to range[-1..1]-->X[i]
285 prev[i][0] = batches.output[i] = 1.0;
286 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
287 }
288 for (int k = 0; k < nCoef; k++) {
289 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
290 batches.output[i] += prev[i][1] * batches.extra[k];
291
292 // compute next order
293 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
294 prev[i][0] = prev[i][1];
295 prev[i][1] = next;
296 }
297 }
298 } else {
299 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
300 double prev0 = 1.0;
301 double prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
302 double X = prev1;
303 batches.output[i] = 1.0;
304 for (int k = 0; k < nCoef; k++) {
305 batches.output[i] += prev1 * batches.extra[k];
306
307 // compute next order
308 const double next = 2 * X * prev1 - prev0;
309 prev0 = prev1;
310 prev1 = next;
311 }
312 }
313 }
314}
315
317{
318 Batch X = batches.args[0];
319 const double ndof = batches.extra[0];
320 const double gamma = 1 / std::tgamma(ndof / 2.0);
321 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
322 batches.output[i] = gamma;
323
324 constexpr double ln2 = 0.693147180559945309417232121458;
325 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
326 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
327 batches.output[i] *= fast_exp(0.5 * arg);
328 }
329}
330
332{
333 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
334 batches.output[i] = 0.0 + (batches.args[0][i] == 1.0);
335 }
336}
337
339{
340 Batch DM = batches.args[0];
341 Batch DM0 = batches.args[1];
342 Batch C = batches.args[2];
343 Batch A = batches.args[3];
344 Batch B = batches.args[4];
345 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
346 const double ratio = DM[i] / DM0[i];
347 const double arg1 = (DM0[i] - DM[i]) / C[i];
348 const double arg2 = A[i] * fast_log(ratio);
349 batches.output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
350 }
351
352 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
353 if (batches.output[i] < 0)
354 batches.output[i] = 0;
355 }
356}
357
359{
360 int lowestOrder = batches.extra[0];
361 int nTerms = batches.extra[1];
362 auto x = batches.args[0];
363
364 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
365 batches.output[i] = 0.0;
366 double xTmp = std::pow(x[i], lowestOrder);
367 for (int k = 0; k < nTerms; ++k) {
368 batches.output[i] += batches.args[k + 1][i] * xTmp;
369 xTmp *= x[i];
370 }
371 batches.output[i] = std::exp(batches.output[i]);
372 }
373}
374
376{
377 Batch x = batches.args[0];
378 Batch c = batches.args[1];
379 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
380 batches.output[i] = fast_exp(x[i] * c[i]);
381 }
382}
383
385{
386 Batch x = batches.args[0];
387 Batch c = batches.args[1];
388 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
389 batches.output[i] = fast_exp(-x[i] * c[i]);
390 }
391}
392
394{
395 Batch X = batches.args[0];
396 Batch G = batches.args[1];
397 Batch B = batches.args[2];
398 Batch M = batches.args[3];
399 double gamma = -std::lgamma(G[0]);
400 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
401 if (X[i] == M[i]) {
402 batches.output[i] = (G[i] == 1.0) / B[i];
403 } else if (G._isVector) {
404 batches.output[i] = -std::lgamma(G[i]);
405 } else {
406 batches.output[i] = gamma;
407 }
408 }
409
410 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
411 if (X[i] != M[i]) {
412 const double invBeta = 1 / B[i];
413 double arg = (X[i] - M[i]) * invBeta;
414 batches.output[i] -= arg;
415 arg = fast_log(arg);
416 batches.output[i] += arg * (G[i] - 1);
417 batches.output[i] = fast_exp(batches.output[i]);
418 batches.output[i] *= invBeta;
419 }
420 }
421}
422
424{
425 const double root2 = std::sqrt(2.);
426 const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
427
428 const bool isMinus = batches.extra[0] < 0.0;
429 const bool isPlus = batches.extra[0] > 0.0;
430
431 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
432
433 const double x = batches.args[0][i];
434 const double mean = batches.args[1][i] * batches.args[2][i];
435 const double sigma = batches.args[3][i] * batches.args[4][i];
436 const double tau = batches.args[5][i];
437
438 if (tau == 0.0) {
439 // Straight Gaussian, used for unconvoluted PDF or expBasis with 0 lifetime
440 double xprime = (x - mean) / sigma;
441 double result = std::exp(-0.5 * xprime * xprime) / (sigma * root2pi);
442 if (!isMinus && !isPlus)
443 result *= 2;
444 batches.output[i] = result;
445 } else {
446 // Convolution with exp(-t/tau)
447 const double xprime = (x - mean) / tau;
448 const double c = sigma / (root2 * tau);
449 const double u = xprime / (2 * c);
450
451 double result = 0.0;
452 if (!isMinus)
453 result += RooHeterogeneousMath::evalCerf(0, -u, c).real();
454 if (!isPlus)
455 result += RooHeterogeneousMath::evalCerf(0, u, c).real();
456 batches.output[i] = result;
457 }
458 }
459}
460
462{
463 auto x = batches.args[0];
464 auto mean = batches.args[1];
465 auto sigma = batches.args[2];
466 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
467 const double arg = x[i] - mean[i];
468 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
469 batches.output[i] = fast_exp(arg * arg * halfBySigmaSq);
470 }
471}
472
474{
475 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
476 batches.output[i] = batches.args[0][i];
477 }
478}
479
481{
482 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
483 batches.output[i] = -fast_log(batches.args[0][i]);
484 // Multiply by weights if they exist
485 if (batches.extra[0]) {
486 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
487 batches.output[i] *= batches.args[1][i];
488 }
489}
490
492{
493 Batch mass = batches.args[0];
494 Batch mu = batches.args[1];
495 Batch lambda = batches.args[2];
496 Batch gamma = batches.args[3];
497 Batch delta = batches.args[4];
498 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
499 const double massThreshold = batches.extra[0];
500
501 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
502 const double arg = (mass[i] - mu[i]) / lambda[i];
503#ifdef R__HAS_VDT
504 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
505#else
506 const double asinh_arg = asinh(arg);
507#endif
508 const double expo = gamma[i] + delta[i] * asinh_arg;
509 const double result =
510 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
511
512 const double passThrough = mass[i] >= massThreshold;
513 batches.output[i] = result * passThrough;
514 }
515}
516
517/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
518 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
519 * and rewritten to enable vectorization.
520 */
522{
523 auto case0 = [](double x) {
524 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
525 const double u = fast_exp(x + 1.0);
526 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
527 };
528 auto case1 = [](double x) {
529 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
530 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
531 const double u = fast_exp(-x - 1);
532 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
533 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
534 };
535 auto case2 = [](double x) {
536 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
537 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
538 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
539 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
540 };
541 auto case3 = [](double x) {
542 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
543 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
544 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
545 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
546 };
547 auto case4 = [](double x) {
548 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
549 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
550 const double u = 1 / x;
551 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
552 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
553 };
554 auto case5 = [](double x) {
555 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
556 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
557 const double u = 1 / x;
558 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
559 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
560 };
561 auto case6 = [](double x) {
562 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
563 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
564 const double u = 1 / x;
565 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
566 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
567 };
568 auto case7 = [](double x) {
569 const double a2[2] = {-1.845568670, -4.284640743};
570 const double u = 1 / (x - x * fast_log(x) / (x + 1));
571 return u * u * (1 + (a2[0] + a2[1] * u) * u);
572 };
573
574 Batch X = batches.args[0];
575 Batch M = batches.args[1];
576 Batch S = batches.args[2];
577
578 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
579 batches.output[i] = (X[i] - M[i]) / S[i];
580
581 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
582 if (S[i] <= 0.0) {
583 batches.output[i] = 0;
584 } else if (batches.output[i] < -5.5) {
585 batches.output[i] = case0(batches.output[i]);
586 } else if (batches.output[i] < -1.0) {
587 batches.output[i] = case1(batches.output[i]);
588 } else if (batches.output[i] < 1.0) {
589 batches.output[i] = case2(batches.output[i]);
590 } else if (batches.output[i] < 5.0) {
591 batches.output[i] = case3(batches.output[i]);
592 } else if (batches.output[i] < 12.0) {
593 batches.output[i] = case4(batches.output[i]);
594 } else if (batches.output[i] < 50.0) {
595 batches.output[i] = case5(batches.output[i]);
596 } else if (batches.output[i] < 300.) {
597 batches.output[i] = case6(batches.output[i]);
598 } else {
599 batches.output[i] = case7(batches.output[i]);
600 }
601 }
602}
603
605{
606 Batch X = batches.args[0];
607 Batch M0 = batches.args[1];
608 Batch K = batches.args[2];
609 constexpr double rootOf2pi = 2.506628274631000502415765284811;
610 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
611 double lnxOverM0 = fast_log(X[i] / M0[i]);
612 double lnk = fast_log(K[i]);
613 if (lnk < 0)
614 lnk = -lnk;
615 double arg = lnxOverM0 / lnk;
616 arg *= -0.5 * arg;
617 batches.output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
618 }
619}
620
622{
623 Batch X = batches.args[0];
624 Batch M0 = batches.args[1];
625 Batch K = batches.args[2];
626 constexpr double rootOf2pi = 2.506628274631000502415765284811;
627 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
628 double lnxOverM0 = fast_log(X[i]) - M0[i];
629 double lnk = K[i];
630 if (lnk < 0)
631 lnk = -lnk;
632 double arg = lnxOverM0 / lnk;
633 arg *= -0.5 * arg;
634 batches.output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
635 }
636}
637
639{
640 auto rawVal = batches.args[0];
641 auto normVal = batches.args[1];
642
643 int nEvalErrorsType0 = 0;
644 int nEvalErrorsType1 = 0;
645 int nEvalErrorsType2 = 0;
646
647 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
648 double out = 0.0;
649 // batches.output[i] = rawVal[i] / normVar[i];
650 if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
651 // Unreasonable normalisations. A zero integral can be tolerated if the function vanishes, though.
652 out = RooNaNPacker::packFloatIntoNaN(-normVal[i] + (rawVal[i] < 0. ? -rawVal[i] : 0.));
653 nEvalErrorsType0++;
654 } else if (rawVal[i] < 0.) {
655 // The pdf value is less than zero.
656 out = RooNaNPacker::packFloatIntoNaN(-rawVal[i]);
657 nEvalErrorsType1++;
658 } else if (std::isnan(rawVal[i])) {
659 // The pdf value is Not-a-Number.
660 out = rawVal[i];
661 nEvalErrorsType2++;
662 } else {
663 out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
664 }
665 batches.output[i] = out;
666 }
667
668 if (nEvalErrorsType0 > 0)
669 batches.extra[0] = batches.extra[0] + nEvalErrorsType0;
670 if (nEvalErrorsType1 > 1)
671 batches.extra[1] = batches.extra[1] + nEvalErrorsType1;
672 if (nEvalErrorsType2 > 2)
673 batches.extra[2] = batches.extra[2] + nEvalErrorsType2;
674}
675
676/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
677 * argasinh -> the argument of TMath::ASinH()
678 * argln -> the argument of the logarithm that replaces AsinH
679 * asinh -> the value that the function evaluates to
680 *
681 * ln is the logarithm that was solely present in the initial
682 * formula, that is before the asinh replacement
683 */
685{
686 Batch X = batches.args[0];
687 Batch P = batches.args[1];
688 Batch W = batches.args[2];
689 Batch T = batches.args[3];
690 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
691 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
692 double argasinh = 0.5 * xi * T[i];
693 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
694 double asinh = fast_log(argln);
695
696 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
697 double ln = fast_log(argln2);
698 batches.output[i] = ln / asinh;
699 batches.output[i] *= -0.125 * xi * xi * batches.output[i];
700 batches.output[i] -= 2.0 / xi / xi * asinh * asinh;
701 }
702
703 // faster if you exponentiate in a separate loop (dark magic!)
704 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
705 batches.output[i] = fast_exp(batches.output[i]);
706}
707
709{
710 Batch x = batches.args[0];
711 Batch mean = batches.args[1];
712 bool protectNegative = batches.extra[0];
713 bool noRounding = batches.extra[1];
714 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
715 const double x_i = noRounding ? x[i] : floor(x[i]);
716 batches.output[i] = std::lgamma(x_i + 1.);
717 }
718
719 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
720 const double x_i = noRounding ? x[i] : floor(x[i]);
721 const double logMean = fast_log(mean[i]);
722 const double logPoisson = x_i * logMean - mean[i] - batches.output[i];
723 batches.output[i] = fast_exp(logPoisson);
724
725 // Cosmetics
726 if (x_i < 0) {
727 batches.output[i] = 0;
728 } else if (x_i == 0) {
729 batches.output[i] = 1 / fast_exp(mean[i]);
730 }
731
732 if (protectNegative && mean[i] < 0)
733 batches.output[i] = 1.E-3;
734 }
735}
736
738{
739 const int nCoef = batches.extra[0];
740 const std::size_t nEvents = batches.nEvents;
741 Batch x = batches.args[nCoef];
742
743 for (size_t i = BEGIN; i < nEvents; i += STEP) {
744 batches.output[i] = batches.args[nCoef - 1][i];
745 }
746
747 // Indexes are in range 0..nCoef-1 but coefList[nCoef-1] has already been
748 // processed.
749 for (int k = nCoef - 2; k >= 0; k--) {
750 for (size_t i = BEGIN; i < nEvents; i += STEP) {
751 batches.output[i] = batches.args[k][i] + x[i] * batches.output[i];
752 }
753 }
754}
755
757{
758 const int nCoef = batches.extra[0];
759 Batch x = batches.args[0];
760
761 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
762 batches.output[i] = 0.0;
763 for (int k = 0; k < nCoef; ++k) {
764 batches.output[i] += batches.args[2 * k + 1][i] * std::pow(x[i], batches.args[2 * k + 2][i]);
765 }
766 }
767}
768
770{
771 const int nPdfs = batches.extra[0];
772 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
773 batches.output[i] = 1.;
774 }
775 for (int pdf = 0; pdf < nPdfs; pdf++) {
776 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
777 batches.output[i] *= batches.args[pdf][i];
778 }
779 }
780}
781
783{
784 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
785 batches.output[i] = batches.args[0][i] / batches.args[1][i];
786 }
787}
788
790{
791
792 const bool isMinus = batches.extra[0] < 0.0;
793 const bool isPlus = batches.extra[0] > 0.0;
794 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
795 double x = batches.args[0][i];
796 // Enforce sign compatibility
797 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
798 batches.output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]);
799 }
800}
801
803{
804 const bool isMinus = batches.extra[0] < 0.0;
805 const bool isPlus = batches.extra[0] > 0.0;
806 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
807 double x = batches.args[0][i];
808 // Enforce sign compatibility
809 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
810 batches.output[i] =
811 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * fast_sin(x * batches.args[2][i]);
812 }
813}
814
816{
817 const bool isMinus = batches.extra[0] < 0.0;
818 const bool isPlus = batches.extra[0] > 0.0;
819 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
820 double x = batches.args[0][i];
821 // Enforce sign compatibility
822 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
823 batches.output[i] =
824 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * fast_cos(x * batches.args[2][i]);
825 }
826}
827
829{
830 const bool isMinus = batches.extra[0] < 0.0;
831 const bool isPlus = batches.extra[0] > 0.0;
832 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
833 double x = batches.args[0][i];
834 // Enforce sign compatibility
835 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
836 if (isOutOfSign) {
837 batches.output[i] = 0.0;
838 } else {
839 const double tscaled = std::abs(x) / batches.args[1][i];
840 batches.output[i] = fast_exp(-tscaled) * tscaled;
841 }
842 }
843}
844
846{
847 const bool isMinus = batches.extra[0] < 0.0;
848 const bool isPlus = batches.extra[0] > 0.0;
849 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
850 double x = batches.args[0][i];
851 // Enforce sign compatibility
852 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
853 if (isOutOfSign) {
854 batches.output[i] = 0.0;
855 } else {
856 const double tscaled = std::abs(x) / batches.args[1][i];
857 batches.output[i] = fast_exp(-tscaled) * tscaled * tscaled;
858 }
859 }
860}
861
863{
864 const bool isMinus = batches.extra[0] < 0.0;
865 const bool isPlus = batches.extra[0] > 0.0;
866 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
867 double x = batches.args[0][i];
868 // Enforce sign compatibility
869 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
870 batches.output[i] =
871 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * sinh(x * batches.args[2][i] * 0.5);
872 }
873}
874
876{
877 const bool isMinus = batches.extra[0] < 0.0;
878 const bool isPlus = batches.extra[0] > 0.0;
879 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
880 double x = batches.args[0][i];
881 // Enforce sign compatibility
882 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
883 batches.output[i] =
884 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * cosh(x * batches.args[2][i] * .5);
885 }
886}
887
889{
890 Batch X = batches.args[0];
891 Batch M = batches.args[1];
892 Batch W = batches.args[2];
893 Batch S = batches.args[3];
894 const double invSqrt2 = 0.707106781186547524400844362105;
895 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
896 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
897 if (S[i] == 0.0 && W[i] == 0.0) {
898 batches.output[i] = 1.0;
899 } else if (S[i] == 0.0) {
900 batches.output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
901 } else if (W[i] == 0.0) {
902 batches.output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
903 } else {
904 batches.output[i] = invSqrt2 / S[i];
905 }
906 }
907
908 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
909 if (S[i] != 0.0 && W[i] != 0.0) {
910 if (batches.output[i] < 0)
911 batches.output[i] = -batches.output[i];
912 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
913 RooHeterogeneousMath::STD::complex<double> z(batches.output[i] * (X[i] - M[i]),
914 factor * batches.output[i] * W[i]);
915 batches.output[i] *= RooHeterogeneousMath::faddeeva(z).real();
916 }
917 }
918}
919
920/// Returns a std::vector of pointers to the compute functions in this file.
921std::vector<void (*)(Batches &)> getFunctions()
922{
923 return {computeAddPdf,
962}
963} // End namespace RF_ARCH
964} // End namespace RooBatchCompute
#define STEP
#define BEGIN
#define c(i)
Definition RSha256.hxx:101
#define __rooglobal__
#define X(type, name)
#define N
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
float xmin
float xmax
std::size_t nEvents
Definition Batches.h:46
double *__restrict output
Definition Batches.h:49
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
__rooglobal__ void computeExpPoly(Batches &batches)
__rooglobal__ void computeExponential(Batches &batches)
__rooglobal__ void computeTruthModelQuadBasis(Batches &batches)
__rooglobal__ void computeAddPdf(Batches &batches)
__rooglobal__ void computePoisson(Batches &batches)
__rooglobal__ void computeBreitWigner(Batches &batches)
__rooglobal__ void computeTruthModelCosBasis(Batches &batches)
__rooglobal__ void computeBMixDecay(Batches &batches)
__rooglobal__ void computeBukin(Batches &batches)
__rooglobal__ void computeChebychev(Batches &batches)
__rooglobal__ void computeBifurGauss(Batches &batches)
__rooglobal__ void computeTruthModelSinBasis(Batches &batches)
__rooglobal__ void computeGaussModelExpBasis(Batches &batches)
__rooglobal__ void computeProdPdf(Batches &batches)
__rooglobal__ void computeLandau(Batches &batches)
__rooglobal__ void computePolynomial(Batches &batches)
__rooglobal__ void computeJohnson(Batches &batches)
__rooglobal__ void computeTruthModelSinhBasis(Batches &batches)
__rooglobal__ void computeExponentialNeg(Batches &batches)
__rooglobal__ void computeTruthModelLinBasis(Batches &batches)
__rooglobal__ void computeBernstein(Batches &batches)
__rooglobal__ void computeTruthModelExpBasis(Batches &batches)
__rooglobal__ void computeChiSquare(Batches &batches)
__rooglobal__ void computeNovosibirsk(Batches &batches)
__rooglobal__ void computeLognormal(Batches &batches)
std::vector< void(*)(Batches &)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
__rooglobal__ void computeDstD0BG(Batches &batches)
__rooglobal__ void computeRatio(Batches &batches)
__rooglobal__ void computeDeltaFunction(Batches &batches)
__rooglobal__ void computeArgusBG(Batches &batches)
__rooglobal__ void computeGaussian(Batches &batches)
__rooglobal__ void computeNormalizedPdf(Batches &batches)
__rooglobal__ void computeTruthModelCoshBasis(Batches &batches)
__rooglobal__ void computeVoigtian(Batches &batches)
__rooglobal__ void computeGamma(Batches &batches)
__rooglobal__ void computeIdentity(Batches &batches)
__rooglobal__ void computeCBShape(Batches &batches)
__rooglobal__ void computeLognormalStandard(Batches &batches)
__rooglobal__ void computePower(Batches &batches)
__rooglobal__ void computeNegativeLogarithms(Batches &batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_sin(double x)
constexpr std::size_t bufferSize
__roodevice__ double fast_log(double x)
__roodevice__ double fast_cos(double x)
__roodevice__ double fast_isqrt(double x)
__roodevice__ __roohost__ STD::complex< double > faddeeva(STD::complex< double > z)
__roohost__ __roodevice__ STD::complex< double > evalCerf(double swt, double u, double c)
constexpr Double_t TwoPi()
Definition TMath.h:44
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
TMarker m
Definition textangle.C:8