Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "ROOT/TypeTraits.hxx" // InvokeResult
28#include "RTaskArena.hxx"
29#include "TError.h"
30
31#include <functional> //std::function
32#include <initializer_list>
33#include <memory>
34#include <numeric> //std::accumulate
35#include <type_traits> //std::enable_if
36#include <utility> //std::move
37#include <vector>
38
39namespace ROOT {
40
41 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
43
44 template <typename F, typename... Args>
45 using InvokeResult_t = ROOT::TypeTraits::InvokeResult_t<F, Args...>;
46
47 public:
48
49 explicit TThreadExecutor(UInt_t nThreads = 0u);
50
53
54 // ForEach
55 //
56 template<class F>
57 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
58 template<class F, class INTEGER>
59 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
60 template<class F, class T>
61 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
62 template<class F, class T>
63 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
64 template<class F, class T>
65 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
66
67 // Map
68 //
70
71 // MapReduce
72 //
73 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
74 // this class but not in the base class.
75 //
76 // the late return types also check at compile-time whether redfunc is compatible with func,
77 // other than checking that func is compatible with the type of arguments.
78 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
80 template <class F, class R, class Cond = noReferenceCond<F>>
81 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
82 template <class F, class R, class Cond = noReferenceCond<F>>
83 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
84 template <class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
85 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
86 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
87 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
88 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
89 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
90 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
91 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
92 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
93 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
94 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
95 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
96
98 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
99 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
100
101 unsigned GetPoolSize() const;
102
103 private:
104 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
105 //
106 template <class F, class Cond = noReferenceCond<F>>
107 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
108 template <class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
109 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
110 template <class F, class T, class Cond = noReferenceCond<F, T>>
111 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
112 template <class F, class T, class Cond = noReferenceCond<F, T>>
113 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
114
115 // Extension of the Map interfaces with chunking, specific to this class and
116 // only available from a MapReduce call.
117 template <class F, class R, class Cond = noReferenceCond<F>>
118 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
119 template <class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
120 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
121 -> std::vector<InvokeResult_t<F, INTEGER>>;
122 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
123 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
124 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
125 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
126 template <class F, class T, class R, class Cond = noReferenceCond<F, T>>
127 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
128
129 // Functions that interface with the parallel library used as a backend
130 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
131 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
132 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
133 template<class T, class R>
134 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
135
136 /// Pointer to the TBB task arena wrapper
137 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
138 };
139
140 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
141
142 //////////////////////////////////////////////////////////////////////////
143 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
144 ///
145 /// \param func Function to be executed.
146 /// \param nTimes Number of times function should be called.
147 /// \param nChunks Number of chunks to split the input data for processing.
148 template<class F>
149 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
150 if (nChunks == 0) {
151 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
152 return;
153 }
154
155 unsigned step = (nTimes + nChunks - 1) / nChunks;
156 auto lambda = [&](unsigned int i)
157 {
158 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
159 func();
160 }
161 };
162 ParallelFor(0U, nTimes, step, lambda);
163 }
164
165 //////////////////////////////////////////////////////////////////////////
166 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
167 ///
168 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
169 /// \param args Sequence of indexes to execute `func` on.
170 /// \param nChunks Number of chunks to split the input data for processing.
171 template<class F, class INTEGER>
172 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
173 if (nChunks == 0) {
174 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
175 return;
176 }
177 unsigned start = *args.begin();
178 unsigned end = *args.end();
179 unsigned seqStep = args.step();
180 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
181
182 auto lambda = [&](unsigned int i)
183 {
184 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
185 func(i + j);
186 }
187 };
188 ParallelFor(start, end, step, lambda);
189 }
190
191 //////////////////////////////////////////////////////////////////////////
192 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
193 ///
194 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
195 /// \param args initializer_list for a vector to apply `func` on.
196 /// \param nChunks Number of chunks to split the input data for processing.
197 template<class F, class T>
198 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
199 std::vector<T> vargs(std::move(args));
200 Foreach(func, vargs, nChunks);
201 }
202
203 //////////////////////////////////////////////////////////////////////////
204 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
205 ///
206 /// \param func Function to be executed on the elements of the vector passed as second parameter.
207 /// \param args Vector of elements passed as an argument to `func`.
208 /// \param nChunks Number of chunks to split the input data for processing.
209 template<class F, class T>
210 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
211 unsigned int nToProcess = args.size();
212 if (nChunks == 0) {
213 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
214 return;
215 }
216
217 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
218 auto lambda = [&](unsigned int i)
219 {
220 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
221 func(args[i + j]);
222 }
223 };
224 ParallelFor(0U, nToProcess, step, lambda);
225 }
226
227 //////////////////////////////////////////////////////////////////////////
228 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
229 ///
230 /// \param func Function to be executed on the elements of the vector passed as second parameter.
231 /// \param args Immutable vector of elements passed as an argument to `func`.
232 /// \param nChunks Number of chunks to split the input data for processing.
233 template<class F, class T>
234 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
235 unsigned int nToProcess = args.size();
236 if (nChunks == 0) {
237 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
238 return;
239 }
240
241 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
242 auto lambda = [&](unsigned int i)
243 {
244 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
245 func(args[i + j]);
246 }
247 };
248 ParallelFor(0U, nToProcess, step, lambda);
249 }
250
251 //////////////////////////////////////////////////////////////////////////
252 /// \brief Execute a function without arguments several times in parallel.
253 /// Implementation of the Map method.
254 ///
255 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
256 template <class F, class Cond>
257 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
258 {
259 using retType = decltype(func());
260 std::vector<retType> reslist(nTimes);
261 auto lambda = [&](unsigned int i)
262 {
263 reslist[i] = func();
264 };
265 ParallelFor(0U, nTimes, 1, lambda);
266
267 return reslist;
268 }
269
270 //////////////////////////////////////////////////////////////////////////
271 /// \brief Execute a function over a sequence of indexes in parallel.
272 /// Implementation of the Map method.
273 ///
274 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
275 template <class F, class INTEGER, class Cond>
276 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
277 {
278 using retType = decltype(func(*args.begin()));
279 std::vector<retType> reslist(args.size());
280 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
281 ParallelFor(0U, args.size(), 1, lambda);
282
283 return reslist;
284 }
285
286 //////////////////////////////////////////////////////////////////////////
287 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
288 /// providing a result per chunk.
289 ///
290 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
291 template <class F, class R, class Cond>
292 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
293 {
294 if (nChunks == 0)
295 {
296 return Map(func, nTimes);
297 }
298
299 unsigned step = (nTimes + nChunks - 1) / nChunks;
300 // Avoid empty chunks
301 unsigned actualChunks = (nTimes + step - 1) / step;
302 using retType = decltype(func());
303 std::vector<retType> reslist(actualChunks);
304 auto lambda = [&](unsigned int i)
305 {
306 std::vector<retType> partialResults(std::min(nTimes-i, step));
307 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
308 partialResults[j] = func();
309 }
310 reslist[i / step] = Reduce(partialResults, redfunc);
311 };
312 ParallelFor(0U, nTimes, step, lambda);
313
314 return reslist;
315 }
316
317 //////////////////////////////////////////////////////////////////////////
318 /// \brief Execute a function over the elements of a vector in parallel.
319 /// Implementation of the Map method.
320 ///
321 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
322 template <class F, class T, class Cond>
323 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
324 {
325 // //check whether func is callable
326 using retType = decltype(func(args.front()));
327
328 unsigned int nToProcess = args.size();
329 std::vector<retType> reslist(nToProcess);
330
331 auto lambda = [&](unsigned int i)
332 {
333 reslist[i] = func(args[i]);
334 };
335
336 ParallelFor(0U, nToProcess, 1, lambda);
337
338 return reslist;
339 }
340
341 //////////////////////////////////////////////////////////////////////////
342 /// \brief Execute a function over the elements of a vector in parallel.
343 /// Implementation of the Map method.
344 ///
345 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
346 template <class F, class T, class Cond>
347 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
348 {
349 // //check whether func is callable
350 using retType = decltype(func(args.front()));
351
352 unsigned int nToProcess = args.size();
353 std::vector<retType> reslist(nToProcess);
354
355 auto lambda = [&](unsigned int i)
356 {
357 reslist[i] = func(args[i]);
358 };
359
360 ParallelFor(0U, nToProcess, 1, lambda);
361
362 return reslist;
363 }
364
365 //////////////////////////////////////////////////////////////////////////
366 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
367 /// providing a result per chunk.
368 ///
369 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
370 template <class F, class INTEGER, class R, class Cond>
371 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
372 -> std::vector<InvokeResult_t<F, INTEGER>>
373 {
374 if (nChunks == 0)
375 {
376 return Map(func, args);
377 }
378
379 unsigned nToProcess = args.size();
380 unsigned step = (nToProcess + nChunks - 1) / nChunks; // ceiling the division
381 // Avoid empty chunks
382 unsigned actualChunks = (nToProcess + step - 1) / step;
383
384 using retType = decltype(func(*args.begin()));
385 std::vector<retType> reslist(actualChunks);
386 auto lambda = [&](unsigned int i) {
387 std::vector<retType> partialResults(std::min(step, nToProcess - i)); // last chunk might be smaller
388 for (unsigned j = 0; j < partialResults.size(); j++) {
389 partialResults[j] = func(args[i + j]);
390 }
391 reslist[i / step] = Reduce(partialResults, redfunc);
392 };
393
394 ParallelFor(0U, nToProcess, step, lambda);
395
396 return reslist;
397 }
398
399 //////////////////////////////////////////////////////////////////////////
400 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
401 /// providing a result per chunk.
402 ///
403 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
404 template <class F, class T, class R, class Cond>
405 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
406 -> std::vector<InvokeResult_t<F, T>>
407 {
408 if (nChunks == 0)
409 {
410 return Map(func, args);
411 }
412
413 unsigned int nToProcess = args.size();
414 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
415 // Avoid empty chunks
416 unsigned actualChunks = (nToProcess + step - 1) / step;
417
418 using retType = decltype(func(args.front()));
419 std::vector<retType> reslist(actualChunks);
420 auto lambda = [&](unsigned int i) {
421 std::vector<retType> partialResults(std::min(step, nToProcess - i));
422 for (unsigned j = 0; j < partialResults.size(); j++) {
423 partialResults[j] = func(args[i + j]);
424 }
425 reslist[i / step] = Reduce(partialResults, redfunc);
426 };
427
428 ParallelFor(0U, nToProcess, step, lambda);
429
430 return reslist;
431 }
432
433 //////////////////////////////////////////////////////////////////////////
434 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
435 /// providing a result per chunk.
436 ///
437 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
438 template <class F, class T, class R, class Cond>
439 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
440 -> std::vector<InvokeResult_t<F, T>>
441 {
442 if (nChunks == 0)
443 {
444 return Map(func, args);
445 }
446
447 unsigned int nToProcess = args.size();
448 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
449 // Avoid empty chunks
450 unsigned actualChunks = (nToProcess + step - 1) / step;
451
452 using retType = decltype(func(args.front()));
453 std::vector<retType> reslist(actualChunks);
454 auto lambda = [&](unsigned int i) {
455 std::vector<retType> partialResults(std::min(step, nToProcess - i));
456 for (unsigned j = 0; j < partialResults.size(); j++) {
457 partialResults[j] = func(args[i + j]);
458 }
459 reslist[i / step] = Reduce(partialResults, redfunc);
460 };
461
462 ParallelFor(0U, nToProcess, step, lambda);
463
464 return reslist;
465 }
466
467 //////////////////////////////////////////////////////////////////////////
468 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
469 /// providing a result per chunk.
470 ///
471 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
472 template <class F, class T, class R, class Cond>
473 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
474 -> std::vector<InvokeResult_t<F, T>>
475 {
476 std::vector<T> vargs(std::move(args));
477 const auto &reslist = Map(func, vargs, redfunc, nChunks);
478 return reslist;
479 }
480
481 //////////////////////////////////////////////////////////////////////////
482 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
483 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
484 template <class F, class R, class Cond>
485 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
486 {
487 return Reduce(Map(func, nTimes), redfunc);
488 }
489
490 //////////////////////////////////////////////////////////////////////////
491 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
492 /// Benefits from partial reduction into `nChunks` intermediate results.
493 ///
494 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
495 template <class F, class R, class Cond>
496 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
497 {
498 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
499 }
500
501 //////////////////////////////////////////////////////////////////////////
502 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
503 /// Benefits from partial reduction into `nChunks` intermediate results.
504 ///
505 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
506 template <class F, class INTEGER, class R, class Cond>
507 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
509 {
510 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
511 }
512
513 //////////////////////////////////////////////////////////////////////////
514 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
515 /// Benefits from partial reduction into `nChunks` intermediate results.
516 ///
517 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
518 template <class F, class T, class R, class Cond>
519 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
521 {
522 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
523 }
524
525 //////////////////////////////////////////////////////////////////////////
526 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
527 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
528 template <class F, class T, class R, class Cond>
529 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
530 {
531 return Reduce(Map(func, args), redfunc);
532 }
533
534 //////////////////////////////////////////////////////////////////////////
535 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
536 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
537 template <class F, class T, class R, class Cond>
538 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
539 {
540 return Reduce(Map(func, args), redfunc);
541 }
542
543 //////////////////////////////////////////////////////////////////////////
544 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
545 /// Benefits from partial reduction into `nChunks` intermediate results.
546 ///
547 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
548 template <class F, class T, class R, class Cond>
549 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
550 {
551 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
552 }
553
554 //////////////////////////////////////////////////////////////////////////
555 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
556 /// Benefits from partial reduction into `nChunks` intermediate results.
557 ///
558 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
559 template <class F, class T, class R, class Cond>
560 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
562 {
563 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
564 }
565
566 //////////////////////////////////////////////////////////////////////////
567 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
568 template<class T, class R>
569 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
570 {
571 // check we can apply reduce to objs
572 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
573 return SeqReduce(objs, redfunc);
574 }
575
576 //////////////////////////////////////////////////////////////////////////
577 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
578 /// binary function as the second argument defining the reduction operation.
579 ///
580 /// \param objs A vector of elements to combine.
581 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
582 /// \return A value result of combining the vector elements into a single object of the same type.
583 template<class T, class BINARYOP>
584 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
585 {
586 // check we can apply reduce to objs
587 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
588 return ParallelReduce(objs, redfunc);
589 }
590
591 //////////////////////////////////////////////////////////////////////////
592 /// \brief "Reduce", sequentially, an std::vector into a single object
593 ///
594 /// \param objs A vector of elements to combine.
595 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
596 /// \return A value result of combining the vector elements into a single object of the same type.
597 template<class T, class R>
598 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
599 {
600 return redfunc(objs);
601 }
602
603} // namespace ROOT
604
605#endif // R__USE_IMT
606#endif
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
iterator begin() const
Definition TSeq.hxx:172
T step() const
Definition TSeq.hxx:193
iterator end() const
Definition TSeq.hxx:175
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
#define F(x, y, z)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.