Logo ROOT  
Reference Guide
RooBatchCompute.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, September 2020
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file RooBatchCompute.cxx
15\class RbcClass
16\ingroup Roobatchcompute
17
18This file contains the code for cpu computations using the RooBatchCompute library.
19**/
20
21#include "RooBatchCompute.h"
22#include "RooVDTHeaders.h"
23#include "Batches.h"
24
25#include "ROOT/RConfig.hxx"
26#include "ROOT/TExecutor.hxx"
27
28#include <algorithm>
29#include <sstream>
30#include <stdexcept>
31
32#ifndef RF_ARCH
33#error "RF_ARCH should always be defined"
34#endif
35
36namespace RooBatchCompute {
37namespace RF_ARCH {
38
39std::vector<void (*)(BatchesHandle)> getFunctions();
40
41/// This class overrides some RooBatchComputeInterface functions, for the
42/// purpose of providing a CPU specific implementation of the library.
44private:
45 const std::vector<void (*)(BatchesHandle)> _computeFunctions;
46
47public:
49 {
50 // Set the dispatch pointer to this instance of the library upon loading
51 dispatchCPU = this;
52 }
53
54 Architecture architecture() const override { return Architecture::RF_ARCH; };
55 std::string architectureName() const override
56 {
57 // transform to lower case to match the original architecture name passed to the compiler
58#ifdef _QUOTEVAL_ // to quote the value of the preprocessor macro instead of the name
59#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
60#endif
61#define _QUOTEVAL_(x) _QUOTE_(x)
62 std::string out = _QUOTEVAL_(RF_ARCH);
63#undef _QUOTEVAL_
64 std::transform(out.begin(), out.end(), out.begin(), [](unsigned char c) { return std::tolower(c); });
65 ;
66 return out;
67 };
68
69 /** Compute multiple values using optimized functions.
70 This method creates a Batches object and passes it to the correct compute function.
71 In case Implicit Multithreading is enabled, the events to be processed are equally
72 divided among the tasks to be generated and computed in parallel.
73 \param computer An enum specifying the compute function to be used.
74 \param output The array where the computation results are stored.
75 \param nEvents The number of events to be processed.
76 \param vars A std::vector containing pointers to the variables involved in the computation.
77 \param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
78 void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars,
79 const ArgVector &extraArgs) override
80 {
81 static std::vector<double> buffer;
82 buffer.resize(vars.size() * bufferSize);
83
86 std::size_t nThreads = ex.GetPoolSize();
87
88 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
89
90 // Reset the number of threads to the number we actually need given nEventsPerThread
91 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
92
93 auto task = [&](std::size_t idx) -> int {
94
95 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
96 // Then advance every object but the first to split the work between threads
97 Batches batches(output, nEventsPerThread, vars, extraArgs, buffer.data());
98 batches.advance(batches.getNEvents() * idx);
99
100 // Set the number of events of the last Batches object as the remaining events
101 if (idx == nThreads - 1) {
102 batches.setNEvents(nEvents - idx * batches.getNEvents());
103 }
104
105 std::size_t events = batches.getNEvents();
106 batches.setNEvents(bufferSize);
107 while (events > bufferSize) {
108 _computeFunctions[computer](batches);
109 batches.advance(bufferSize);
110 events -= bufferSize;
111 }
112 batches.setNEvents(events);
113 _computeFunctions[computer](batches);
114 return 0;
115 };
116
117 std::vector<std::size_t> indices(nThreads);
118 for (unsigned int i = 1; i < nThreads; i++) {
119 indices[i] = i;
120 }
121 ex.Map(task, indices);
122 } else {
123 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
124 // Then advance every object but the first to split the work between threads
125 Batches batches(output, nEvents, vars, extraArgs, buffer.data());
126
127 std::size_t events = batches.getNEvents();
128 batches.setNEvents(bufferSize);
129 while (events > bufferSize) {
130 _computeFunctions[computer](batches);
131 batches.advance(bufferSize);
132 events -= bufferSize;
133 }
134 batches.setNEvents(events);
135 _computeFunctions[computer](batches);
136 }
137 }
138 /// Return the sum of an input array
139 double sumReduce(cudaStream_t *, InputArr input, size_t n) override
140 {
141 long double sum = 0.0;
142 for (size_t i = 0; i < n; i++)
143 sum += input[i];
144 return sum;
145 }
146}; // End class RooBatchComputeClass
147
148/// Static object to trigger the constructor which overwrites the dispatch pointer.
150
151/** Construct a Batches object
152\param output The array where the computation results are stored.
153\param nEvents The number of events to be processed.
154\param vars A std::vector containing pointers to the variables involved in the computation.
155\param extraArgs An optional std::vector containing extra double values that may participate in the computation.
156\param buffer A 2D array that is used as a buffer for scalar variables.
157For every scalar parameter a buffer (one row of the buffer) is filled with copies of the scalar
158value, so that it behaves as a batch and facilitates auto-vectorization. The Batches object can be
159passed by value to a compute function to perform efficient computations. **/
160Batches::Batches(RestrictArr output, size_t nEvents, const VarVector &vars, const ArgVector &extraArgs, double *buffer)
161 : _nEvents(nEvents), _nBatches(vars.size()), _nExtraArgs(extraArgs.size()), _output(output)
162{
163 _arrays.resize(vars.size());
164 for (size_t i = 0; i < vars.size(); i++) {
165 const RooSpan<const double> &span = vars[i];
166 if (span.empty()) {
167 std::stringstream ss;
168 ss << "The span number " << i << " passed to Batches::Batches() is empty!";
169 throw std::runtime_error(ss.str());
170 } else if (span.size() > 1)
171 _arrays[i].set(span.data()[0], span.data(), true);
172 else {
173 std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]);
174 _arrays[i].set(span.data()[0], &buffer[i * bufferSize], false);
175 }
176 }
177 _extraArgs = extraArgs;
178}
179
180} // End namespace RF_ARCH
181} // End namespace RooBatchCompute
#define c(i)
Definition: RSha256.hxx:101
#define _QUOTEVAL_(x)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
This class implements the interface to execute the same task multiple times, sequentially or in paral...
Definition: TExecutor.hxx:38
__roodevice__ std::size_t getNEvents() const
Definition: Batches.h:99
void advance(std::size_t nEvents)
Definition: Batches.h:105
Batches(RestrictArr output, std::size_t nEvents, const VarVector &vars, const ArgVector &extraArgs={}, double *buffer=nullptr)
void setNEvents(std::size_t n)
Definition: Batches.h:104
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a CPU spec...
void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars, const ArgVector &extraArgs) override
Compute multiple values using optimized functions.
double sumReduce(cudaStream_t *, InputArr input, size_t n) override
Return the sum of an input array.
const std::vector< void(*)(BatchesHandle)> _computeFunctions
The interface which should be implemented to provide optimised computation functions for implementati...
A simple container to hold a batch of data values.
Definition: RooSpan.h:34
constexpr std::span< T >::pointer data() const
Definition: RooSpan.h:106
constexpr std::span< T >::size_type size() const noexcept
Definition: RooSpan.h:121
constexpr bool empty() const noexcept
Definition: RooSpan.h:125
const Int_t n
Definition: legend1.C:16
Double_t ex[n]
Definition: legend1.C:17
void(off) SmallVectorTemplateBase< T
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Batches & BatchesHandle
Definition: Batches.h:117
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
Definition: Batches.h:38
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr
static void output()