28#ifdef ROOBATCHCOMPUTE_USE_IMT
40#error "RF_ARCH should always be defined"
48void fillBatches(
Batches &batches,
double *
output,
size_t nEvents, std::size_t nBatches,
ArgSpan extraArgs)
50 batches.extra = extraArgs.data();
51 batches.nEvents = nEvents;
52 batches.nBatches = nBatches;
53 batches.nExtra = extraArgs.size();
57void fillArrays(std::span<Batch> arrays,
VarSpan vars, std::size_t nEvents)
59 for (std::size_t i = 0; i < vars.size(); i++) {
60 arrays[i]._array = vars[i].data();
61 arrays[i]._isVector = vars[i].empty() || vars[i].size() >= nEvents;
65inline void advance(
Batches &batches, std::size_t nEvents)
67 for (std::size_t i = 0; i < batches.nBatches; i++) {
68 Batch &arg = batches.args[i];
69 arg._array += arg._isVector * nEvents;
71 batches.output += nEvents;
80class RooBatchComputeClass :
public RooBatchComputeInterface {
93#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
95#define _QUOTEVAL_(x) _QUOTE_(x)
98 std::transform(out.begin(), out.end(), out.begin(), [](
unsigned char c) { return std::tolower(c); });
105 std::span<const double> offsetProbas)
override;
108#ifdef ROOBATCHCOMPUTE_USE_IMT
115#ifdef ROOBATCHCOMPUTE_USE_IMT
118 std::size_t nEvents =
output.size();
125 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
128 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
130 auto task = [&](std::size_t idx) ->
int {
134 std::vector<Batch> arrays(vars.size());
135 fillBatches(batches,
output.data(), nEventsPerThread, vars.size(), extraArgs);
136 fillArrays(arrays, vars, nEvents);
137 batches.
args = arrays.data();
138 advance(batches, batches.
nEvents * idx);
141 if (idx == nThreads - 1) {
145 std::size_t events = batches.
nEvents;
157 std::vector<std::size_t> indices(nThreads);
158 for (
unsigned int i = 1; i < nThreads; i++) {
161 ex.Map(task, indices);
194#ifdef ROOBATCHCOMPUTE_USE_IMT
196 computeIMT(computer,
output, vars, extraArgs);
200 std::size_t nEvents =
output.size();
205 std::vector<Batch> arrays(vars.size());
206 fillBatches(batches,
output.data(), nEvents, vars.size(), extraArgs);
207 fillArrays(arrays, vars, nEvents);
208 batches.args = arrays.data();
210 std::size_t events = batches.nEvents;
217 batches.nEvents = events;
223inline std::pair<double, double> getLog(
double prob, ReduceNLLOutput &out)
225 if (std::abs(prob) > 1e6) {
230 out.nNonPositiveValues++;
231 return {std::log(prob), -prob};
234 if (std::isnan(prob)) {
239 return {std::log(prob), 0.0};
250 std::span<const double> weights, std::span<const double> offsetProbas)
254 double badness = 0.0;
258 for (std::size_t i = 0; i <
probas.size(); ++i) {
260 const double eventWeight = weights.size() > 1 ? weights[i] : weights[0];
262 if (0. == eventWeight)
265 std::pair<double, double> logOut = getLog(probas[i], out);
266 double term = logOut.first;
267 badness += logOut.second;
269 if (!offsetProbas.empty()) {
270 term -= std::log(offsetProbas[i]);
273 term *= -eventWeight;
278 out.nllSum = nllSum.
Sum();
284 out.nllSumCarry = 0.0;
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
These classes encapsulate the necessary data for the computations.
This class implements the interface to execute the same task multiple times, sequentially or in paral...
unsigned GetPoolSize() const
Return the number of pooled workers.
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
void Add(T x)
Single-element accumulation. Will not vectorise.
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a cuda spe...
std::string architectureName() const override
void compute(Config const &, Computer computer, std::span< double > output, VarSpan vars, ArgSpan extraArgs) override
ReduceNLLOutput reduceNLL(RooBatchCompute::Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
double reduceSum(Config const &, InputArr input, size_t n) override
const std::vector< void(*)(Batches &)> _computeFunctions
void compute(RooBatchCompute::Config const &cfg, Computer computer, std::span< double > output, VarSpan vars, ArgSpan extraArgs) override
Compute multiple values using cuda kernels.
double reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) override
Return the sum of an input array.
Architecture architecture() const override
ReduceNLLOutput reduceNLL(Config const &, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
void(off) SmallVectorTemplateBase< T
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
std::vector< void(*)(Batches &)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
Namespace for dispatching RooFit computations to various backends.
std::span< double > ArgSpan
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
const double *__restrict InputArr
std::span< const std::span< const double > > VarSpan
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...