Logo ROOT  
Reference Guide
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "RTaskArena.hxx"
28#include "TError.h"
29
30#include <functional> //std::function
31#include <initializer_list>
32#include <memory>
33#include <numeric> //std::accumulate
34#include <type_traits> //std::enable_if, std::result_of
35#include <utility> //std::move
36#include <vector>
37
38namespace ROOT {
39
40 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
42 public:
43
44 explicit TThreadExecutor(UInt_t nThreads = 0u);
45
48
49 // ForEach
50 //
51 template<class F>
52 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
53 template<class F, class INTEGER>
54 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
55 template<class F, class T>
56 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
57 template<class F, class T>
58 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
59 template<class F, class T>
60 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
61
62 // Map
63 //
65
66 // MapReduce
67 //
68 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
69 // this class but not in the base class.
70 //
71 // the late return types also check at compile-time whether redfunc is compatible with func,
72 // other than checking that func is compatible with the type of arguments.
73 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
75 template<class F, class R, class Cond = noReferenceCond<F>>
76 auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type;
77 template<class F, class R, class Cond = noReferenceCond<F>>
78 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type;
79 template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
80 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type;
81 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
82 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
83 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
84 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
85 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
86 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
87 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
88 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
89 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
90 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
91
93 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
94 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
95
96 unsigned GetPoolSize() const;
97
98 private:
99 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
100 //
101 template<class F, class Cond = noReferenceCond<F>>
102 auto MapImpl(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
103 template<class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
104 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
105 template<class F, class T, class Cond = noReferenceCond<F, T>>
106 auto MapImpl(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
107 template<class F, class T, class Cond = noReferenceCond<F, T>>
108 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
109
110 // Extension of the Map interfaces with chunking, specific to this class and
111 // only available from a MapReduce call.
112 template<class F, class R, class Cond = noReferenceCond<F>>
113 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
114 template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
115 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
116 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
117 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
118 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
119 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
120 template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
121 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
122
123 // Functions that interface with the parallel library used as a backend
124 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
125 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
126 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
127 template<class T, class R>
128 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
129
130 /// Pointer to the TBB task arena wrapper
131 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
132 };
133
134 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
135
136 //////////////////////////////////////////////////////////////////////////
137 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
138 ///
139 /// \param func Function to be executed.
140 /// \param nTimes Number of times function should be called.
141 /// \param nChunks Number of chunks to split the input data for processing.
142 template<class F>
143 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
144 if (nChunks == 0) {
145 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
146 return;
147 }
148
149 unsigned step = (nTimes + nChunks - 1) / nChunks;
150 auto lambda = [&](unsigned int i)
151 {
152 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
153 func();
154 }
155 };
156 ParallelFor(0U, nTimes, step, lambda);
157 }
158
159 //////////////////////////////////////////////////////////////////////////
160 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
161 ///
162 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
163 /// \param args Sequence of indexes to execute `func` on.
164 /// \param nChunks Number of chunks to split the input data for processing.
165 template<class F, class INTEGER>
166 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
167 if (nChunks == 0) {
168 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
169 return;
170 }
171 unsigned start = *args.begin();
172 unsigned end = *args.end();
173 unsigned seqStep = args.step();
174 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
175
176 auto lambda = [&](unsigned int i)
177 {
178 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
179 func(i + j);
180 }
181 };
182 ParallelFor(start, end, step, lambda);
183 }
184
185 //////////////////////////////////////////////////////////////////////////
186 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
187 ///
188 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
189 /// \param args initializer_list for a vector to apply `func` on.
190 /// \param nChunks Number of chunks to split the input data for processing.
191 template<class F, class T>
192 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
193 std::vector<T> vargs(std::move(args));
194 Foreach(func, vargs, nChunks);
195 }
196
197 //////////////////////////////////////////////////////////////////////////
198 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
199 ///
200 /// \param func Function to be executed on the elements of the vector passed as second parameter.
201 /// \param args Vector of elements passed as an argument to `func`.
202 /// \param nChunks Number of chunks to split the input data for processing.
203 template<class F, class T>
204 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
205 unsigned int nToProcess = args.size();
206 if (nChunks == 0) {
207 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
208 return;
209 }
210
211 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
212 auto lambda = [&](unsigned int i)
213 {
214 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
215 func(args[i + j]);
216 }
217 };
218 ParallelFor(0U, nToProcess, step, lambda);
219 }
220
221 //////////////////////////////////////////////////////////////////////////
222 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
223 ///
224 /// \param func Function to be executed on the elements of the vector passed as second parameter.
225 /// \param args Immutable vector of elements passed as an argument to `func`.
226 /// \param nChunks Number of chunks to split the input data for processing.
227 template<class F, class T>
228 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
229 unsigned int nToProcess = args.size();
230 if (nChunks == 0) {
231 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
232 return;
233 }
234
235 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
236 auto lambda = [&](unsigned int i)
237 {
238 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
239 func(args[i + j]);
240 }
241 };
242 ParallelFor(0U, nToProcess, step, lambda);
243 }
244
245 //////////////////////////////////////////////////////////////////////////
246 /// \brief Execute a function without arguments several times in parallel.
247 /// Implementation of the Map method.
248 ///
249 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
250 template<class F, class Cond>
251 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type> {
252 using retType = decltype(func());
253 std::vector<retType> reslist(nTimes);
254 auto lambda = [&](unsigned int i)
255 {
256 reslist[i] = func();
257 };
258 ParallelFor(0U, nTimes, 1, lambda);
259
260 return reslist;
261 }
262
263 //////////////////////////////////////////////////////////////////////////
264 /// \brief Execute a function over a sequence of indexes in parallel.
265 /// Implementation of the Map method.
266 ///
267 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
268 template<class F, class INTEGER, class Cond>
269 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
270 unsigned start = *args.begin();
271 unsigned end = *args.end();
272 unsigned seqStep = args.step();
273
274 using retType = decltype(func(start));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](unsigned int i)
277 {
278 reslist[i] = func(i);
279 };
280 ParallelFor(start, end, seqStep, lambda);
281
282 return reslist;
283 }
284
285 //////////////////////////////////////////////////////////////////////////
286 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
287 /// providing a result per chunk.
288 ///
289 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
290 template<class F, class R, class Cond>
291 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
292 if (nChunks == 0)
293 {
294 return Map(func, nTimes);
295 }
296
297 unsigned step = (nTimes + nChunks - 1) / nChunks;
298 // Avoid empty chunks
299 unsigned actualChunks = (nTimes + step - 1) / step;
300 using retType = decltype(func());
301 std::vector<retType> reslist(actualChunks);
302 auto lambda = [&](unsigned int i)
303 {
304 std::vector<retType> partialResults(std::min(nTimes-i, step));
305 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
306 partialResults[j] = func();
307 }
308 reslist[i / step] = Reduce(partialResults, redfunc);
309 };
310 ParallelFor(0U, nTimes, step, lambda);
311
312 return reslist;
313 }
314
315 //////////////////////////////////////////////////////////////////////////
316 /// \brief Execute a function over the elements of a vector in parallel.
317 /// Implementation of the Map method.
318 ///
319 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
320 template<class F, class T, class Cond>
321 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type> {
322 // //check whether func is callable
323 using retType = decltype(func(args.front()));
324
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
327
328 auto lambda = [&](unsigned int i)
329 {
330 reslist[i] = func(args[i]);
331 };
332
333 ParallelFor(0U, nToProcess, 1, lambda);
334
335 return reslist;
336 }
337
338 //////////////////////////////////////////////////////////////////////////
339 /// \brief Execute a function over the elements of a vector in parallel.
340 /// Implementation of the Map method.
341 ///
342 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
343 template<class F, class T, class Cond>
344 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type> {
345 // //check whether func is callable
346 using retType = decltype(func(args.front()));
347
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
350
351 auto lambda = [&](unsigned int i)
352 {
353 reslist[i] = func(args[i]);
354 };
355
356 ParallelFor(0U, nToProcess, 1, lambda);
357
358 return reslist;
359 }
360
361 //////////////////////////////////////////////////////////////////////////
362 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
363 /// providing a result per chunk.
364 ///
365 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
366 template<class F, class INTEGER, class R, class Cond>
367 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
368 if (nChunks == 0)
369 {
370 return Map(func, args);
371 }
372
373 unsigned start = *args.begin();
374 unsigned end = *args.end();
375 unsigned seqStep = args.step();
376 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
377 // Avoid empty chunks
378 unsigned actualChunks = (end - start + step - 1) / step;
379
380 using retType = decltype(func(start));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](unsigned int i)
383 {
384 std::vector<retType> partialResults(std::min(end-i, step));
385 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
386 partialResults[j] = func(i + j);
387 }
388 reslist[i / step] = Reduce(partialResults, redfunc);
389 };
390 ParallelFor(start, end, step, lambda);
391
392 return reslist;
393 }
394
395 //////////////////////////////////////////////////////////////////////////
396 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
397 /// providing a result per chunk.
398 ///
399 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
400 template<class F, class T, class R, class Cond>
401 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
402 if (nChunks == 0)
403 {
404 return Map(func, args);
405 }
406
407 unsigned int nToProcess = args.size();
408 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
409 // Avoid empty chunks
410 unsigned actualChunks = (nToProcess + step - 1) / step;
411
412 using retType = decltype(func(args.front()));
413 std::vector<retType> reslist(actualChunks);
414 auto lambda = [&](unsigned int i)
415 {
416 std::vector<T> partialResults(step);
417 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
418 partialResults[j] = func(args[i + j]);
419 }
420 reslist[i / step] = Reduce(partialResults, redfunc);
421 };
422
423 ParallelFor(0U, nToProcess, step, lambda);
424
425 return reslist;
426 }
427
428 //////////////////////////////////////////////////////////////////////////
429 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
430 /// providing a result per chunk.
431 ///
432 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
433 template<class F, class T, class R, class Cond>
434 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
435 if (nChunks == 0)
436 {
437 return Map(func, args);
438 }
439
440 unsigned int nToProcess = args.size();
441 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
442 // Avoid empty chunks
443 unsigned actualChunks = (nToProcess + step - 1) / step;
444
445 using retType = decltype(func(args.front()));
446 std::vector<retType> reslist(actualChunks);
447 auto lambda = [&](unsigned int i)
448 {
449 std::vector<T> partialResults(step);
450 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
451 partialResults[j] = func(args[i + j]);
452 }
453 reslist[i / step] = Reduce(partialResults, redfunc);
454 };
455
456 ParallelFor(0U, nToProcess, step, lambda);
457
458 return reslist;
459 }
460
461 //////////////////////////////////////////////////////////////////////////
462 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
463 /// providing a result per chunk.
464 ///
465 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
466 template<class F, class T, class R, class Cond>
467 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
468 std::vector<T> vargs(std::move(args));
469 const auto &reslist = Map(func, vargs, redfunc, nChunks);
470 return reslist;
471 }
472
473 //////////////////////////////////////////////////////////////////////////
474 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
475 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
476 template<class F, class R, class Cond>
477 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type {
478 return Reduce(Map(func, nTimes), redfunc);
479 }
480
481 //////////////////////////////////////////////////////////////////////////
482 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
483 /// Benefits from partial reduction into `nChunks` intermediate results.
484 ///
485 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
486 template<class F, class R, class Cond>
487 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type {
488 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
489 }
490
491 //////////////////////////////////////////////////////////////////////////
492 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
493 /// Benefits from partial reduction into `nChunks` intermediate results.
494 ///
495 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
496 template<class F, class INTEGER, class R, class Cond>
497 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type {
498 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
499 }
500
501 //////////////////////////////////////////////////////////////////////////
502 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
503 /// Benefits from partial reduction into `nChunks` intermediate results.
504 ///
505 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
506 template<class F, class T, class R, class Cond>
507 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
508 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
509 }
510
511 //////////////////////////////////////////////////////////////////////////
512 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
513 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
514 template<class F, class T, class R, class Cond>
515 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
516 return Reduce(Map(func, args), redfunc);
517 }
518
519 //////////////////////////////////////////////////////////////////////////
520 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
521 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
522 template<class F, class T, class R, class Cond>
523 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
524 return Reduce(Map(func, args), redfunc);
525 }
526
527 //////////////////////////////////////////////////////////////////////////
528 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
529 /// Benefits from partial reduction into `nChunks` intermediate results.
530 ///
531 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
532 template<class F, class T, class R, class Cond>
533 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
534 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
535 }
536
537 //////////////////////////////////////////////////////////////////////////
538 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
539 /// Benefits from partial reduction into `nChunks` intermediate results.
540 ///
541 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
542 template<class F, class T, class R, class Cond>
543 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
544 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
545 }
546
547 //////////////////////////////////////////////////////////////////////////
548 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
549 template<class T, class R>
550 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
551 {
552 // check we can apply reduce to objs
553 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
554 return SeqReduce(objs, redfunc);
555 }
556
557 //////////////////////////////////////////////////////////////////////////
558 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
559 /// binary function as the second argument defining the reduction operation.
560 ///
561 /// \param objs A vector of elements to combine.
562 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
563 /// \return A value result of combining the vector elements into a single object of the same type.
564 template<class T, class BINARYOP>
565 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
566 {
567 // check we can apply reduce to objs
568 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
569 return ParallelReduce(objs, redfunc);
570 }
571
572 //////////////////////////////////////////////////////////////////////////
573 /// \brief "Reduce", sequentially, an std::vector into a single object
574 ///
575 /// \param objs A vector of elements to combine.
576 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
577 /// \return A value result of combining the vector elements into a single object of the same type.
578 template<class T, class R>
579 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
580 {
581 return redfunc(objs);
582 }
583
584} // namespace ROOT
585
586#endif // R__USE_IMT
587#endif
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
unsigned int UInt_t
Definition: RtypesCore.h:46
int type
Definition: TGX11.cxx:121
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
iterator begin() const
Definition: TSeq.hxx:163
T step() const
Definition: TSeq.hxx:184
iterator end() const
Definition: TSeq.hxx:166
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F()>::type >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute a function without arguments several times in parallel.
auto Map(Args &&... args)
Create new collection applying a callable to the elements of the input collection.
Definition: RVec.hxx:2023
#define F(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:150
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
auto * a
Definition: textangle.C:12