12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
31#include <initializer_list>
52 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
53 template<
class F,
class INTEGER>
55 template<
class F,
class T>
56 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
57 template<
class F,
class T>
58 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
59 template<
class F,
class T>
60 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
75 template<
class F,
class R,
class Cond = noReferenceCond<F>>
76 auto MapReduce(
F func,
unsigned nTimes,
R redfunc) ->
typename std::result_of<
F()>
::type;
77 template<
class F,
class R,
class Cond = noReferenceCond<F>>
78 auto MapReduce(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F()>
::type;
79 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
81 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
82 auto MapReduce(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(T)>
::type;
83 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
84 auto MapReduce(
F func, std::vector<T> &args,
R redfunc) ->
typename std::result_of<
F(T)>
::type;
85 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
86 auto MapReduce(
F func,
const std::vector<T> &args,
R redfunc) ->
typename std::result_of<
F(T)>
::type;
87 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
88 auto MapReduce(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(T)>
::type;
89 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
90 auto MapReduce(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(T)>
::type;
93 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
94 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
101 template<
class F,
class Cond = noReferenceCond<F>>
102 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<
typename std::result_of<
F()>
::type>;
103 template<
class F,
class INTEGER,
class Cond = noReferenceCond<F, INTEGER>>
105 template<
class F,
class T,
class Cond = noReferenceCond<F, T>>
106 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<
typename std::result_of<
F(T)>
::type>;
107 template<
class F,
class T,
class Cond = noReferenceCond<F, T>>
108 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<
typename std::result_of<
F(T)>
::type>;
112 template<
class F,
class R,
class Cond = noReferenceCond<F>>
113 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F()>
::type>;
114 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
116 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
117 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(T)>
::type>;
118 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
119 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(T)>
::type>;
120 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
121 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(T)>
::type>;
124 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
125 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
126 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
127 template<
class T,
class R>
128 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
131 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
145 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
149 unsigned step = (nTimes + nChunks - 1) / nChunks;
150 auto lambda = [&](
unsigned int i)
152 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
165 template<
class F,
class INTEGER>
171 unsigned start = *args.
begin();
172 unsigned end = *args.
end();
173 unsigned seqStep = args.
step();
174 unsigned step = (end - start + nChunks - 1) / nChunks;
176 auto lambda = [&](
unsigned int i)
178 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
191 template<
class F,
class T>
193 std::vector<T> vargs(std::move(args));
203 template<
class F,
class T>
205 unsigned int nToProcess = args.size();
207 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
211 unsigned step = (nToProcess + nChunks - 1) / nChunks;
212 auto lambda = [&](
unsigned int i)
214 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
227 template<
class F,
class T>
229 unsigned int nToProcess = args.size();
231 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
235 unsigned step = (nToProcess + nChunks - 1) / nChunks;
236 auto lambda = [&](
unsigned int i)
238 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
250 template<
class F,
class Cond>
252 using retType =
decltype(func());
253 std::vector<retType> reslist(nTimes);
254 auto lambda = [&](
unsigned int i)
258 ParallelFor(0U, nTimes, 1, lambda);
268 template<
class F,
class INTEGER,
class Cond>
270 unsigned start = *args.begin();
271 unsigned end = *args.end();
272 unsigned seqStep = args.step();
274 using retType =
decltype(func(start));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](
unsigned int i)
278 reslist[i] = func(i);
280 ParallelFor(start, end, seqStep, lambda);
290 template<
class F,
class R,
class Cond>
294 return Map(func, nTimes);
297 unsigned step = (nTimes + nChunks - 1) / nChunks;
299 unsigned actualChunks = (nTimes + step - 1) / step;
300 using retType =
decltype(func());
301 std::vector<retType> reslist(actualChunks);
302 auto lambda = [&](
unsigned int i)
304 std::vector<retType> partialResults(std::min(nTimes-i, step));
305 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
306 partialResults[j] = func();
308 reslist[i / step] = Reduce(partialResults, redfunc);
310 ParallelFor(0U, nTimes, step, lambda);
320 template<
class F,
class T,
class Cond>
323 using retType =
decltype(func(args.front()));
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
328 auto lambda = [&](
unsigned int i)
330 reslist[i] = func(args[i]);
333 ParallelFor(0U, nToProcess, 1, lambda);
343 template<
class F,
class T,
class Cond>
346 using retType =
decltype(func(args.front()));
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
351 auto lambda = [&](
unsigned int i)
353 reslist[i] = func(args[i]);
356 ParallelFor(0U, nToProcess, 1, lambda);
366 template<
class F,
class INTEGER,
class R,
class Cond>
370 return Map(func, args);
373 unsigned start = *args.begin();
374 unsigned end = *args.end();
375 unsigned seqStep = args.step();
376 unsigned step = (end - start + nChunks - 1) / nChunks;
378 unsigned actualChunks = (end - start + step - 1) / step;
380 using retType =
decltype(func(start));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](
unsigned int i)
384 std::vector<retType> partialResults(std::min(end-i, step));
385 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
386 partialResults[j] = func(i + j);
388 reslist[i / step] = Reduce(partialResults, redfunc);
390 ParallelFor(start, end, step, lambda);
400 template<
class F,
class T,
class R,
class Cond>
404 return Map(func, args);
407 unsigned int nToProcess = args.size();
408 unsigned step = (nToProcess + nChunks - 1) / nChunks;
410 unsigned actualChunks = (nToProcess + step - 1) / step;
412 using retType =
decltype(func(args.front()));
413 std::vector<retType> reslist(actualChunks);
414 auto lambda = [&](
unsigned int i)
416 std::vector<T> partialResults(step);
417 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
418 partialResults[j] = func(args[i + j]);
420 reslist[i / step] = Reduce(partialResults, redfunc);
423 ParallelFor(0U, nToProcess, step, lambda);
433 template<
class F,
class T,
class R,
class Cond>
437 return Map(func, args);
440 unsigned int nToProcess = args.size();
441 unsigned step = (nToProcess + nChunks - 1) / nChunks;
443 unsigned actualChunks = (nToProcess + step - 1) / step;
445 using retType =
decltype(func(args.front()));
446 std::vector<retType> reslist(actualChunks);
447 auto lambda = [&](
unsigned int i)
449 std::vector<T> partialResults(step);
450 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
451 partialResults[j] = func(args[i + j]);
453 reslist[i / step] = Reduce(partialResults, redfunc);
456 ParallelFor(0U, nToProcess, step, lambda);
466 template<
class F,
class T,
class R,
class Cond>
468 std::vector<T> vargs(std::move(args));
469 const auto &reslist = Map(func, vargs, redfunc, nChunks);
476 template<
class F,
class R,
class Cond>
478 return Reduce(Map(func, nTimes), redfunc);
486 template<
class F,
class R,
class Cond>
488 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
496 template<
class F,
class INTEGER,
class R,
class Cond>
498 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
506 template<
class F,
class T,
class R,
class Cond>
508 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
514 template<
class F,
class T,
class R,
class Cond>
516 return Reduce(Map(func, args), redfunc);
522 template<
class F,
class T,
class R,
class Cond>
524 return Reduce(Map(func, args), redfunc);
532 template<
class F,
class T,
class R,
class Cond>
534 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
542 template<
class F,
class T,
class R,
class Cond>
544 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
549 template<
class T,
class R>
553 static_assert(std::is_same<
decltype(redfunc(objs)), T>::value,
"redfunc does not have the correct signature");
554 return SeqReduce(objs, redfunc);
564 template<
class T,
class BINARYOP>
568 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>::value,
"redfunc does not have the correct signature");
569 return ParallelReduce(objs, redfunc);
578 template<
class T,
class R>
581 return redfunc(objs);
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F()>::type >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute a function without arguments several times in parallel.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...