12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
32#include <initializer_list>
54 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
55 template<
class F,
class INTEGER>
57 template<
class F,
class T>
58 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
59 template<
class F,
class T>
60 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
61 template<
class F,
class T>
62 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
69 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
70 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
71 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
73 -> std::vector<InvokeResult_t<F, INTEGER>>;
74 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
75 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
76 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
77 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
78 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
79 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
90 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
92 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
94 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
96 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
98 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
100 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
102 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
104 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
108 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
109 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
116 template <
class F,
class Cond = val
idMapReturnCond<F>>
117 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
118 template <
class F,
class INTEGER,
class Cond = val
idMapReturnCond<F, INTEGER>>
120 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
121 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
122 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
123 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
126 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
127 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
128 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
129 template<
class T,
class R>
130 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
133 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
147 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
151 unsigned step = (nTimes + nChunks - 1) / nChunks;
152 auto lambda = [&](
unsigned int i)
154 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
167 template<
class F,
class INTEGER>
173 unsigned start = *args.
begin();
174 unsigned end = *args.
end();
175 unsigned seqStep = args.
step();
176 unsigned step = (end - start + nChunks - 1) / nChunks;
178 auto lambda = [&](
unsigned int i)
180 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
193 template<
class F,
class T>
195 std::vector<T> vargs(std::move(args));
205 template<
class F,
class T>
207 unsigned int nToProcess = args.size();
209 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
213 unsigned step = (nToProcess + nChunks - 1) / nChunks;
214 auto lambda = [&](
unsigned int i)
216 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
229 template<
class F,
class T>
231 unsigned int nToProcess = args.size();
233 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
237 unsigned step = (nToProcess + nChunks - 1) / nChunks;
238 auto lambda = [&](
unsigned int i)
240 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
252 template <
class F,
class Cond>
255 using retType =
decltype(func());
256 std::vector<retType> reslist(nTimes);
257 auto lambda = [&](
unsigned int i)
261 ParallelFor(0U, nTimes, 1, lambda);
271 template <
class F,
class INTEGER,
class Cond>
274 using retType =
decltype(func(*args.begin()));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](
unsigned int i) { reslist[i] = func(args[i]); };
277 ParallelFor(0U, args.size(), 1, lambda);
287 template <
class F,
class R,
class Cond>
292 return Map(func, nTimes);
295 unsigned step = (nTimes + nChunks - 1) / nChunks;
297 unsigned actualChunks = (nTimes + step - 1) / step;
298 using retType =
decltype(func());
299 std::vector<retType> reslist(actualChunks);
300 auto lambda = [&](
unsigned int i)
302 std::vector<retType> partialResults(std::min(nTimes-i, step));
303 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
304 partialResults[j] = func();
306 reslist[i / step] = Reduce(partialResults, redfunc);
308 ParallelFor(0U, nTimes, step, lambda);
318 template <
class F,
class T,
class Cond>
322 using retType =
decltype(func(args.front()));
324 unsigned int nToProcess = args.size();
325 std::vector<retType> reslist(nToProcess);
327 auto lambda = [&](
unsigned int i)
329 reslist[i] = func(args[i]);
332 ParallelFor(0U, nToProcess, 1, lambda);
342 template <
class F,
class T,
class Cond>
346 using retType =
decltype(func(args.front()));
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
351 auto lambda = [&](
unsigned int i)
353 reslist[i] = func(args[i]);
356 ParallelFor(0U, nToProcess, 1, lambda);
366 template <
class F,
class INTEGER,
class R,
class Cond>
368 -> std::vector<InvokeResult_t<F, INTEGER>>
372 return Map(func, args);
375 unsigned nToProcess = args.size();
376 unsigned step = (nToProcess + nChunks - 1) / nChunks;
378 unsigned actualChunks = (nToProcess + step - 1) / step;
380 using retType =
decltype(func(*args.begin()));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](
unsigned int i) {
383 std::vector<retType> partialResults(std::min(step, nToProcess - i));
384 for (
unsigned j = 0; j < partialResults.size(); j++) {
385 partialResults[j] = func(args[i + j]);
387 reslist[i / step] = Reduce(partialResults, redfunc);
390 ParallelFor(0U, nToProcess, step, lambda);
400 template <
class F,
class T,
class R,
class Cond>
402 -> std::vector<InvokeResult_t<F, T>>
406 return Map(func, args);
409 unsigned int nToProcess = args.size();
410 unsigned step = (nToProcess + nChunks - 1) / nChunks;
412 unsigned actualChunks = (nToProcess + step - 1) / step;
414 using retType =
decltype(func(args.front()));
415 std::vector<retType> reslist(actualChunks);
416 auto lambda = [&](
unsigned int i) {
417 std::vector<retType> partialResults(std::min(step, nToProcess - i));
418 for (
unsigned j = 0; j < partialResults.size(); j++) {
419 partialResults[j] = func(args[i + j]);
421 reslist[i / step] = Reduce(partialResults, redfunc);
424 ParallelFor(0U, nToProcess, step, lambda);
434 template <
class F,
class T,
class R,
class Cond>
436 -> std::vector<InvokeResult_t<F, T>>
440 return Map(func, args);
443 unsigned int nToProcess = args.size();
444 unsigned step = (nToProcess + nChunks - 1) / nChunks;
446 unsigned actualChunks = (nToProcess + step - 1) / step;
448 using retType =
decltype(func(args.front()));
449 std::vector<retType> reslist(actualChunks);
450 auto lambda = [&](
unsigned int i) {
451 std::vector<retType> partialResults(std::min(step, nToProcess - i));
452 for (
unsigned j = 0; j < partialResults.size(); j++) {
453 partialResults[j] = func(args[i + j]);
455 reslist[i / step] = Reduce(partialResults, redfunc);
458 ParallelFor(0U, nToProcess, step, lambda);
468 template <
class F,
class T,
class R,
class Cond>
470 -> std::vector<InvokeResult_t<F, T>>
472 std::vector<T> vargs(std::move(args));
473 const auto &reslist = Map(func, vargs, redfunc, nChunks);
480 template <
class F,
class R,
class Cond>
483 return Reduce(Map(func, nTimes), redfunc);
491 template <
class F,
class R,
class Cond>
494 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
502 template <
class F,
class INTEGER,
class R,
class Cond>
506 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
514 template <
class F,
class T,
class R,
class Cond>
518 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
524 template <
class F,
class T,
class R,
class Cond>
527 return Reduce(Map(func, args), redfunc);
533 template <
class F,
class T,
class R,
class Cond>
536 return Reduce(Map(func, args), redfunc);
544 template <
class F,
class T,
class R,
class Cond>
547 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
555 template <
class F,
class T,
class R,
class Cond>
559 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
564 template<
class T,
class R>
568 static_assert(std::is_same<
decltype(redfunc(objs)), T>
::value,
"redfunc does not have the correct signature");
569 return SeqReduce(objs, redfunc);
579 template<
class T,
class BINARYOP>
583 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>
::value,
"redfunc does not have the correct signature");
584 return ParallelReduce(objs, redfunc);
593 template<
class T,
class R>
596 return redfunc(objs);
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...