12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
44 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
45 template<
class F,
class INTEGER>
48 template<
class F,
class T>
49 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
51 template<
class F,
class T>
52 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
53 template<
class F,
class T>
54 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
57 template<
class F,
class Cond = noReferenceCond<F>>
58 auto Map(
F func,
unsigned nTimes) -> std::vector<
typename std::result_of<
F()>
::type>;
59 template<
class F,
class INTEGER,
class Cond = noReferenceCond<F, INTEGER>>
61 template<
class F,
class T,
class Cond = noReferenceCond<F, T>>
62 auto Map(
F func, std::vector<T> &args) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
69 template<
class F,
class R,
class Cond = noReferenceCond<F>>
70 auto MapReduce(
F func,
unsigned nTimes,
R redfunc) ->
typename std::result_of<
F()>
::type;
71 template<
class F,
class R,
class Cond = noReferenceCond<F>>
72 auto MapReduce(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F()>
::type;
73 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
76 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
77 auto MapReduce(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(
T)>
::type;
79 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
80 auto MapReduce(
F func, std::vector<T> &args,
R redfunc) ->
typename std::result_of<
F(
T)>
::type;
81 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
82 auto MapReduce(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(
T)>
::type;
85 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
86 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
91 template<
class F,
class R,
class Cond = noReferenceCond<F>>
92 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F()>
::type>;
93 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
95 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
96 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
97 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
98 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
104 template<
class T,
class R>
105 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
107 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
119 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
123 unsigned step = (nTimes + nChunks - 1) / nChunks;
124 auto lambda = [&](
unsigned int i)
126 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
136 template<
class F,
class INTEGER>
142 unsigned start = *args.
begin();
143 unsigned end = *args.
end();
144 unsigned seqStep = args.
step();
145 unsigned step = (end - start + nChunks - 1) / nChunks;
147 auto lambda = [&](
unsigned int i)
149 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
160 template<
class F,
class T>
162 std::vector<T> vargs(std::move(args));
170 template<
class F,
class T>
172 unsigned int nToProcess = args.size();
174 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
178 unsigned step = (nToProcess + nChunks - 1) / nChunks;
179 auto lambda = [&](
unsigned int i)
181 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
190 template<
class F,
class T>
192 unsigned int nToProcess = args.size();
194 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
198 unsigned step = (nToProcess + nChunks - 1) / nChunks;
199 auto lambda = [&](
unsigned int i)
201 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
213 template<
class F,
class Cond>
215 using retType =
decltype(func());
216 std::vector<retType> reslist(nTimes);
217 auto lambda = [&](
unsigned int i)
221 ParallelFor(0U, nTimes, 1, lambda);
230 template<
class F,
class INTEGER,
class Cond>
232 unsigned start = *args.begin();
233 unsigned end = *args.end();
234 unsigned seqStep = args.step();
236 using retType =
decltype(func(start));
237 std::vector<retType> reslist(args.size());
238 auto lambda = [&](
unsigned int i)
240 reslist[i] = func(i);
242 ParallelFor(start, end, seqStep, lambda);
251 template<
class F,
class R,
class Cond>
255 return Map(func, nTimes);
258 unsigned step = (nTimes + nChunks - 1) / nChunks;
260 unsigned actualChunks = (nTimes + step - 1) / step;
261 using retType =
decltype(func());
262 std::vector<retType> reslist(actualChunks);
263 auto lambda = [&](
unsigned int i)
265 std::vector<retType> partialResults(std::min(nTimes-i, step));
266 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
267 partialResults[j] = func();
269 reslist[i / step] = Reduce(partialResults, redfunc);
271 ParallelFor(0U, nTimes, step, lambda);
282 template<
class F,
class T,
class Cond>
285 using retType =
decltype(func(args.front()));
287 unsigned int nToProcess = args.size();
288 std::vector<retType> reslist(nToProcess);
290 auto lambda = [&](
unsigned int i)
292 reslist[i] = func(args[i]);
295 ParallelFor(0U, nToProcess, 1, lambda);
305 template<
class F,
class INTEGER,
class R,
class Cond>
309 return Map(func, args);
312 unsigned start = *args.begin();
313 unsigned end = *args.end();
314 unsigned seqStep = args.step();
315 unsigned step = (end - start + nChunks - 1) / nChunks;
317 unsigned actualChunks = (end - start + step - 1) / step;
319 using retType =
decltype(func(start));
320 std::vector<retType> reslist(actualChunks);
321 auto lambda = [&](
unsigned int i)
323 std::vector<retType> partialResults(std::min(end-i, step));
324 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
325 partialResults[j] = func(i + j);
327 reslist[i / step] = Reduce(partialResults, redfunc);
329 ParallelFor(start, end, step, lambda);
340 template<
class F,
class T,
class R,
class Cond>
344 return Map(func, args);
347 unsigned int nToProcess = args.size();
348 unsigned step = (nToProcess + nChunks - 1) / nChunks;
350 unsigned actualChunks = (nToProcess + step - 1) / step;
352 using retType =
decltype(func(args.front()));
353 std::vector<retType> reslist(actualChunks);
354 auto lambda = [&](
unsigned int i)
356 std::vector<T> partialResults(step);
357 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
358 partialResults[j] = func(args[i + j]);
360 reslist[i / step] = Reduce(partialResults, redfunc);
363 ParallelFor(0U, nToProcess, step, lambda);
373 template<
class F,
class T,
class R,
class Cond>
374 auto TThreadExecutor::Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type> {
375 std::vector<T> vargs(std::move(args));
376 const auto &reslist =
Map(func, vargs, redfunc, nChunks);
389 template<
class F,
class R,
class Cond>
391 return Reduce(
Map(func, nTimes), redfunc);
394 template<
class F,
class R,
class Cond>
396 return Reduce(
Map(func, nTimes, redfunc, nChunks), redfunc);
399 template<
class F,
class INTEGER,
class R,
class Cond>
401 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
404 template<
class F,
class T,
class R,
class Cond>
406 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
410 template<
class F,
class T,
class R,
class Cond>
412 return Reduce(
Map(func, args), redfunc);
415 template<
class F,
class T,
class R,
class Cond>
417 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
423 template<
class T,
class BINARYOP>
427 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())),
T>::value,
"redfunc does not have the correct signature");
428 return ParallelReduce(objs, redfunc);
434 template<
class T,
class R>
438 static_assert(std::is_same<
decltype(redfunc(objs)),
T>::value,
"redfunc does not have the correct signature");
439 return SeqReduce(objs, redfunc);
442 template<
class T,
class R>
445 return redfunc(objs);
This class defines an interface to execute the same task multiple times in parallel,...
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
TThreadExecutor & operator=(TThreadExecutor &)=delete
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
auto Map(F func, std::vector< T > &args, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F(T)>::type >
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
TThreadExecutor(TThreadExecutor &)=delete
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
auto Map(F func, std::initializer_list< T > args, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F(T)>::type >
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
auto Map(Args &&... args) -> decltype(ROOT::Detail::VecOps::MapFromTuple(std::forward_as_tuple(args...), std::make_index_sequence< sizeof...(args) - 1 >()))
Create new collection applying a callable to the elements of the input collection.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...