12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
32#include <initializer_list>
54 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
55 template<
class F,
class INTEGER>
57 template<
class F,
class T>
58 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
59 template<
class F,
class T>
60 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
61 template<
class F,
class T>
62 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
77 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
79 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
81 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
83 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
85 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
87 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
89 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
91 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
95 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
96 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
103 template <
class F,
class Cond = val
idMapReturnCond<F>>
104 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
105 template <
class F,
class INTEGER,
class Cond = val
idMapReturnCond<F, INTEGER>>
107 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
108 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
109 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
110 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
114 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
115 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
116 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
118 -> std::vector<InvokeResult_t<F, INTEGER>>;
119 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
120 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
121 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
122 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
123 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
124 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
127 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
128 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
129 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
130 template<
class T,
class R>
131 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
134 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
148 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
152 unsigned step = (nTimes + nChunks - 1) / nChunks;
153 auto lambda = [&](
unsigned int i)
155 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
168 template<
class F,
class INTEGER>
174 unsigned start = *args.
begin();
175 unsigned end = *args.
end();
176 unsigned seqStep = args.
step();
177 unsigned step = (end - start + nChunks - 1) / nChunks;
179 auto lambda = [&](
unsigned int i)
181 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
194 template<
class F,
class T>
196 std::vector<T> vargs(std::move(args));
206 template<
class F,
class T>
208 unsigned int nToProcess = args.size();
210 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
214 unsigned step = (nToProcess + nChunks - 1) / nChunks;
215 auto lambda = [&](
unsigned int i)
217 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
230 template<
class F,
class T>
232 unsigned int nToProcess = args.size();
234 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
238 unsigned step = (nToProcess + nChunks - 1) / nChunks;
239 auto lambda = [&](
unsigned int i)
241 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
253 template <
class F,
class Cond>
256 using retType =
decltype(func());
257 std::vector<retType> reslist(nTimes);
258 auto lambda = [&](
unsigned int i)
262 ParallelFor(0U, nTimes, 1, lambda);
272 template <
class F,
class INTEGER,
class Cond>
275 using retType =
decltype(func(*args.begin()));
276 std::vector<retType> reslist(args.size());
277 auto lambda = [&](
unsigned int i) { reslist[i] = func(args[i]); };
278 ParallelFor(0U, args.size(), 1, lambda);
288 template <
class F,
class R,
class Cond>
293 return Map(func, nTimes);
296 unsigned step = (nTimes + nChunks - 1) / nChunks;
298 unsigned actualChunks = (nTimes + step - 1) / step;
299 using retType =
decltype(func());
300 std::vector<retType> reslist(actualChunks);
301 auto lambda = [&](
unsigned int i)
303 std::vector<retType> partialResults(std::min(nTimes-i, step));
304 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
305 partialResults[j] = func();
307 reslist[i / step] = Reduce(partialResults, redfunc);
309 ParallelFor(0U, nTimes, step, lambda);
319 template <
class F,
class T,
class Cond>
323 using retType =
decltype(func(args.front()));
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
328 auto lambda = [&](
unsigned int i)
330 reslist[i] = func(args[i]);
333 ParallelFor(0U, nToProcess, 1, lambda);
343 template <
class F,
class T,
class Cond>
347 using retType =
decltype(func(args.front()));
349 unsigned int nToProcess = args.size();
350 std::vector<retType> reslist(nToProcess);
352 auto lambda = [&](
unsigned int i)
354 reslist[i] = func(args[i]);
357 ParallelFor(0U, nToProcess, 1, lambda);
367 template <
class F,
class INTEGER,
class R,
class Cond>
369 -> std::vector<InvokeResult_t<F, INTEGER>>
373 return Map(func, args);
376 unsigned nToProcess = args.size();
377 unsigned step = (nToProcess + nChunks - 1) / nChunks;
379 unsigned actualChunks = (nToProcess + step - 1) / step;
381 using retType =
decltype(func(*args.begin()));
382 std::vector<retType> reslist(actualChunks);
383 auto lambda = [&](
unsigned int i) {
384 std::vector<retType> partialResults(std::min(step, nToProcess - i));
385 for (
unsigned j = 0; j < partialResults.size(); j++) {
386 partialResults[j] = func(args[i + j]);
388 reslist[i / step] = Reduce(partialResults, redfunc);
391 ParallelFor(0U, nToProcess, step, lambda);
401 template <
class F,
class T,
class R,
class Cond>
403 -> std::vector<InvokeResult_t<F, T>>
407 return Map(func, args);
410 unsigned int nToProcess = args.size();
411 unsigned step = (nToProcess + nChunks - 1) / nChunks;
413 unsigned actualChunks = (nToProcess + step - 1) / step;
415 using retType =
decltype(func(args.front()));
416 std::vector<retType> reslist(actualChunks);
417 auto lambda = [&](
unsigned int i) {
418 std::vector<retType> partialResults(std::min(step, nToProcess - i));
419 for (
unsigned j = 0; j < partialResults.size(); j++) {
420 partialResults[j] = func(args[i + j]);
422 reslist[i / step] = Reduce(partialResults, redfunc);
425 ParallelFor(0U, nToProcess, step, lambda);
435 template <
class F,
class T,
class R,
class Cond>
437 -> std::vector<InvokeResult_t<F, T>>
441 return Map(func, args);
444 unsigned int nToProcess = args.size();
445 unsigned step = (nToProcess + nChunks - 1) / nChunks;
447 unsigned actualChunks = (nToProcess + step - 1) / step;
449 using retType =
decltype(func(args.front()));
450 std::vector<retType> reslist(actualChunks);
451 auto lambda = [&](
unsigned int i) {
452 std::vector<retType> partialResults(std::min(step, nToProcess - i));
453 for (
unsigned j = 0; j < partialResults.size(); j++) {
454 partialResults[j] = func(args[i + j]);
456 reslist[i / step] = Reduce(partialResults, redfunc);
459 ParallelFor(0U, nToProcess, step, lambda);
469 template <
class F,
class T,
class R,
class Cond>
471 -> std::vector<InvokeResult_t<F, T>>
473 std::vector<T> vargs(std::move(args));
474 const auto &reslist = Map(func, vargs, redfunc, nChunks);
481 template <
class F,
class R,
class Cond>
484 return Reduce(Map(func, nTimes), redfunc);
492 template <
class F,
class R,
class Cond>
495 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
503 template <
class F,
class INTEGER,
class R,
class Cond>
507 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
515 template <
class F,
class T,
class R,
class Cond>
519 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
525 template <
class F,
class T,
class R,
class Cond>
528 return Reduce(Map(func, args), redfunc);
534 template <
class F,
class T,
class R,
class Cond>
537 return Reduce(Map(func, args), redfunc);
545 template <
class F,
class T,
class R,
class Cond>
548 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
556 template <
class F,
class T,
class R,
class Cond>
560 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
565 template<
class T,
class R>
569 static_assert(std::is_same<
decltype(redfunc(objs)), T>
::value,
"redfunc does not have the correct signature");
570 return SeqReduce(objs, redfunc);
580 template<
class T,
class BINARYOP>
584 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>
::value,
"redfunc does not have the correct signature");
585 return ParallelReduce(objs, redfunc);
594 template<
class T,
class R>
597 return redfunc(objs);
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...