Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "ROOT/TypeTraits.hxx" // InvokeResult
28#include "RTaskArena.hxx"
29#include "TError.h"
30
31#include <functional> //std::function
32#include <initializer_list>
33#include <memory>
34#include <numeric> //std::accumulate
35#include <type_traits> //std::enable_if
36#include <utility> //std::move
37#include <vector>
38
39namespace ROOT {
40
41 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
43
44 public:
45
46 explicit TThreadExecutor(UInt_t nThreads = 0u);
47
50
51 // ForEach
52 //
53 template<class F>
54 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
55 template<class F, class INTEGER>
56 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
57 template<class F, class T>
58 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
59 template<class F, class T>
60 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
61 template<class F, class T>
62 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
63
64 // Map
65 //
67
68 // Extension of the Map interfaces with chunking, specific to this class
69 template <class F, class R, class Cond = validMapReturnCond<F>>
70 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
71 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
72 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
73 -> std::vector<InvokeResult_t<F, INTEGER>>;
74 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
75 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
76 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
77 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
78 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
79 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
80
81 // MapReduce
82 //
83 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
84 // this class but not in the base class.
85 //
86 // the late return types also check at compile-time whether redfunc is compatible with func,
87 // other than checking that func is compatible with the type of arguments.
88 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
90 template <class F, class R, class Cond = validMapReturnCond<F>>
91 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
92 template <class F, class R, class Cond = validMapReturnCond<F>>
93 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
94 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
95 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
96 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
97 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
98 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
99 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
100 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
101 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
102 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
103 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
104 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
105 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
106
108 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
109 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
110
111 unsigned GetPoolSize() const;
112
113 private:
114 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
115 //
116 template <class F, class Cond = validMapReturnCond<F>>
117 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
118 template <class F, class INTEGER, class Cond = validMapReturnCond<F, INTEGER>>
119 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
120 template <class F, class T, class Cond = validMapReturnCond<F, T>>
121 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
122 template <class F, class T, class Cond = validMapReturnCond<F, T>>
123 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
124
125 // Functions that interface with the parallel library used as a backend
126 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
127 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
128 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
129 template<class T, class R>
130 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
131
132 /// Pointer to the TBB task arena wrapper
133 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
134 };
135
136 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
137
138 //////////////////////////////////////////////////////////////////////////
139 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
140 ///
141 /// \param func Function to be executed.
142 /// \param nTimes Number of times function should be called.
143 /// \param nChunks Number of chunks to split the input data for processing.
144 template<class F>
145 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
146 if (nChunks == 0) {
147 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
148 return;
149 }
150
151 unsigned step = (nTimes + nChunks - 1) / nChunks;
152 auto lambda = [&](unsigned int i)
153 {
154 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
155 func();
156 }
157 };
158 ParallelFor(0U, nTimes, step, lambda);
159 }
160
161 //////////////////////////////////////////////////////////////////////////
162 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
163 ///
164 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
165 /// \param args Sequence of indexes to execute `func` on.
166 /// \param nChunks Number of chunks to split the input data for processing.
167 template<class F, class INTEGER>
168 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
169 if (nChunks == 0) {
170 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
171 return;
172 }
173 unsigned start = *args.begin();
174 unsigned end = *args.end();
175 unsigned seqStep = args.step();
176 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
177
178 auto lambda = [&](unsigned int i)
179 {
180 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
181 func(i + j);
182 }
183 };
184 ParallelFor(start, end, step, lambda);
185 }
186
187 //////////////////////////////////////////////////////////////////////////
188 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
189 ///
190 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
191 /// \param args initializer_list for a vector to apply `func` on.
192 /// \param nChunks Number of chunks to split the input data for processing.
193 template<class F, class T>
194 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
195 std::vector<T> vargs(std::move(args));
196 Foreach(func, vargs, nChunks);
197 }
198
199 //////////////////////////////////////////////////////////////////////////
200 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
201 ///
202 /// \param func Function to be executed on the elements of the vector passed as second parameter.
203 /// \param args Vector of elements passed as an argument to `func`.
204 /// \param nChunks Number of chunks to split the input data for processing.
205 template<class F, class T>
206 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
207 unsigned int nToProcess = args.size();
208 if (nChunks == 0) {
209 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
210 return;
211 }
212
213 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
214 auto lambda = [&](unsigned int i)
215 {
216 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
217 func(args[i + j]);
218 }
219 };
220 ParallelFor(0U, nToProcess, step, lambda);
221 }
222
223 //////////////////////////////////////////////////////////////////////////
224 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
225 ///
226 /// \param func Function to be executed on the elements of the vector passed as second parameter.
227 /// \param args Immutable vector of elements passed as an argument to `func`.
228 /// \param nChunks Number of chunks to split the input data for processing.
229 template<class F, class T>
230 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
231 unsigned int nToProcess = args.size();
232 if (nChunks == 0) {
233 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
234 return;
235 }
236
237 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
238 auto lambda = [&](unsigned int i)
239 {
240 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
241 func(args[i + j]);
242 }
243 };
244 ParallelFor(0U, nToProcess, step, lambda);
245 }
246
247 //////////////////////////////////////////////////////////////////////////
248 /// \brief Execute a function without arguments several times in parallel.
249 /// Implementation of the Map method.
250 ///
251 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
252 template <class F, class Cond>
253 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
254 {
255 using retType = decltype(func());
256 std::vector<retType> reslist(nTimes);
257 auto lambda = [&](unsigned int i)
258 {
259 reslist[i] = func();
260 };
261 ParallelFor(0U, nTimes, 1, lambda);
262
263 return reslist;
264 }
265
266 //////////////////////////////////////////////////////////////////////////
267 /// \brief Execute a function over a sequence of indexes in parallel.
268 /// Implementation of the Map method.
269 ///
270 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
271 template <class F, class INTEGER, class Cond>
272 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
273 {
274 using retType = decltype(func(*args.begin()));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
277 ParallelFor(0U, args.size(), 1, lambda);
278
279 return reslist;
280 }
281
282 //////////////////////////////////////////////////////////////////////////
283 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
284 /// providing a result per chunk.
285 ///
286 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
287 template <class F, class R, class Cond>
288 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
289 {
290 if (nChunks == 0)
291 {
292 return Map(func, nTimes);
293 }
294
295 unsigned step = (nTimes + nChunks - 1) / nChunks;
296 // Avoid empty chunks
297 unsigned actualChunks = (nTimes + step - 1) / step;
298 using retType = decltype(func());
299 std::vector<retType> reslist(actualChunks);
300 auto lambda = [&](unsigned int i)
301 {
302 std::vector<retType> partialResults(std::min(nTimes-i, step));
303 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
304 partialResults[j] = func();
305 }
306 reslist[i / step] = Reduce(partialResults, redfunc);
307 };
308 ParallelFor(0U, nTimes, step, lambda);
309
310 return reslist;
311 }
312
313 //////////////////////////////////////////////////////////////////////////
314 /// \brief Execute a function over the elements of a vector in parallel.
315 /// Implementation of the Map method.
316 ///
317 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
318 template <class F, class T, class Cond>
319 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
320 {
321 // //check whether func is callable
322 using retType = decltype(func(args.front()));
323
324 unsigned int nToProcess = args.size();
325 std::vector<retType> reslist(nToProcess);
326
327 auto lambda = [&](unsigned int i)
328 {
329 reslist[i] = func(args[i]);
330 };
331
332 ParallelFor(0U, nToProcess, 1, lambda);
333
334 return reslist;
335 }
336
337 //////////////////////////////////////////////////////////////////////////
338 /// \brief Execute a function over the elements of a vector in parallel.
339 /// Implementation of the Map method.
340 ///
341 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
342 template <class F, class T, class Cond>
343 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
344 {
345 // //check whether func is callable
346 using retType = decltype(func(args.front()));
347
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
350
351 auto lambda = [&](unsigned int i)
352 {
353 reslist[i] = func(args[i]);
354 };
355
356 ParallelFor(0U, nToProcess, 1, lambda);
357
358 return reslist;
359 }
360
361 //////////////////////////////////////////////////////////////////////////
362 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
363 /// providing a result per chunk.
364 ///
365 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
366 template <class F, class INTEGER, class R, class Cond>
367 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
368 -> std::vector<InvokeResult_t<F, INTEGER>>
369 {
370 if (nChunks == 0)
371 {
372 return Map(func, args);
373 }
374
375 unsigned nToProcess = args.size();
376 unsigned step = (nToProcess + nChunks - 1) / nChunks; // ceiling the division
377 // Avoid empty chunks
378 unsigned actualChunks = (nToProcess + step - 1) / step;
379
380 using retType = decltype(func(*args.begin()));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](unsigned int i) {
383 std::vector<retType> partialResults(std::min(step, nToProcess - i)); // last chunk might be smaller
384 for (unsigned j = 0; j < partialResults.size(); j++) {
385 partialResults[j] = func(args[i + j]);
386 }
387 reslist[i / step] = Reduce(partialResults, redfunc);
388 };
389
390 ParallelFor(0U, nToProcess, step, lambda);
391
392 return reslist;
393 }
394
395 //////////////////////////////////////////////////////////////////////////
396 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
397 /// providing a result per chunk.
398 ///
399 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
400 template <class F, class T, class R, class Cond>
401 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
402 -> std::vector<InvokeResult_t<F, T>>
403 {
404 if (nChunks == 0)
405 {
406 return Map(func, args);
407 }
408
409 unsigned int nToProcess = args.size();
410 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
411 // Avoid empty chunks
412 unsigned actualChunks = (nToProcess + step - 1) / step;
413
414 using retType = decltype(func(args.front()));
415 std::vector<retType> reslist(actualChunks);
416 auto lambda = [&](unsigned int i) {
417 std::vector<retType> partialResults(std::min(step, nToProcess - i));
418 for (unsigned j = 0; j < partialResults.size(); j++) {
419 partialResults[j] = func(args[i + j]);
420 }
421 reslist[i / step] = Reduce(partialResults, redfunc);
422 };
423
424 ParallelFor(0U, nToProcess, step, lambda);
425
426 return reslist;
427 }
428
429 //////////////////////////////////////////////////////////////////////////
430 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
431 /// providing a result per chunk.
432 ///
433 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
434 template <class F, class T, class R, class Cond>
435 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
436 -> std::vector<InvokeResult_t<F, T>>
437 {
438 if (nChunks == 0)
439 {
440 return Map(func, args);
441 }
442
443 unsigned int nToProcess = args.size();
444 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
445 // Avoid empty chunks
446 unsigned actualChunks = (nToProcess + step - 1) / step;
447
448 using retType = decltype(func(args.front()));
449 std::vector<retType> reslist(actualChunks);
450 auto lambda = [&](unsigned int i) {
451 std::vector<retType> partialResults(std::min(step, nToProcess - i));
452 for (unsigned j = 0; j < partialResults.size(); j++) {
453 partialResults[j] = func(args[i + j]);
454 }
455 reslist[i / step] = Reduce(partialResults, redfunc);
456 };
457
458 ParallelFor(0U, nToProcess, step, lambda);
459
460 return reslist;
461 }
462
463 //////////////////////////////////////////////////////////////////////////
464 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
465 /// providing a result per chunk.
466 ///
467 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
468 template <class F, class T, class R, class Cond>
469 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
470 -> std::vector<InvokeResult_t<F, T>>
471 {
472 std::vector<T> vargs(std::move(args));
473 const auto &reslist = Map(func, vargs, redfunc, nChunks);
474 return reslist;
475 }
476
477 //////////////////////////////////////////////////////////////////////////
478 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
479 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
480 template <class F, class R, class Cond>
481 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
482 {
483 return Reduce(Map(func, nTimes), redfunc);
484 }
485
486 //////////////////////////////////////////////////////////////////////////
487 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
488 /// Benefits from partial reduction into `nChunks` intermediate results.
489 ///
490 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
491 template <class F, class R, class Cond>
492 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
493 {
494 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
495 }
496
497 //////////////////////////////////////////////////////////////////////////
498 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
499 /// Benefits from partial reduction into `nChunks` intermediate results.
500 ///
501 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
502 template <class F, class INTEGER, class R, class Cond>
503 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
505 {
506 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
507 }
508
509 //////////////////////////////////////////////////////////////////////////
510 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
511 /// Benefits from partial reduction into `nChunks` intermediate results.
512 ///
513 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
514 template <class F, class T, class R, class Cond>
515 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
517 {
518 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
519 }
520
521 //////////////////////////////////////////////////////////////////////////
522 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
523 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
524 template <class F, class T, class R, class Cond>
525 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
526 {
527 return Reduce(Map(func, args), redfunc);
528 }
529
530 //////////////////////////////////////////////////////////////////////////
531 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
532 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
533 template <class F, class T, class R, class Cond>
534 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
535 {
536 return Reduce(Map(func, args), redfunc);
537 }
538
539 //////////////////////////////////////////////////////////////////////////
540 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
541 /// Benefits from partial reduction into `nChunks` intermediate results.
542 ///
543 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
544 template <class F, class T, class R, class Cond>
545 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
546 {
547 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
548 }
549
550 //////////////////////////////////////////////////////////////////////////
551 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
552 /// Benefits from partial reduction into `nChunks` intermediate results.
553 ///
554 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
555 template <class F, class T, class R, class Cond>
556 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
558 {
559 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
560 }
561
562 //////////////////////////////////////////////////////////////////////////
563 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
564 template<class T, class R>
565 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
566 {
567 // check we can apply reduce to objs
568 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
569 return SeqReduce(objs, redfunc);
570 }
571
572 //////////////////////////////////////////////////////////////////////////
573 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
574 /// binary function as the second argument defining the reduction operation.
575 ///
576 /// \param objs A vector of elements to combine.
577 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
578 /// \return A value result of combining the vector elements into a single object of the same type.
579 template<class T, class BINARYOP>
580 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
581 {
582 // check we can apply reduce to objs
583 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
584 return ParallelReduce(objs, redfunc);
585 }
586
587 //////////////////////////////////////////////////////////////////////////
588 /// \brief "Reduce", sequentially, an std::vector into a single object
589 ///
590 /// \param objs A vector of elements to combine.
591 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
592 /// \return A value result of combining the vector elements into a single object of the same type.
593 template<class T, class R>
594 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
595 {
596 return redfunc(objs);
597 }
598
599} // namespace ROOT
600
601#endif // R__USE_IMT
602#endif
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
iterator begin() const
Definition TSeq.hxx:172
T step() const
Definition TSeq.hxx:193
iterator end() const
Definition TSeq.hxx:175
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
#define F(x, y, z)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...