Logo ROOT  
Reference Guide
TThreadExecutor.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Xavier Valls March 2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TThreadExecutor
13 #define ROOT_TThreadExecutor
14 
15 #include "RConfigure.h"
16 
17 // exclude in case ROOT does not have IMT support
18 #ifndef R__USE_IMT
19 // No need to error out for dictionaries.
20 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22 # endif
23 #else
24 
25 #include "ROOT/TExecutorCRTP.hxx"
26 #include "ROOT/TSeq.hxx"
27 #include "RTaskArena.hxx"
28 #include "TError.h"
29 
30 #include <functional> //std::function
31 #include <initializer_list>
32 #include <memory>
33 #include <numeric> //std::accumulate
34 #include <type_traits> //std::enable_if, std::result_of
35 #include <utility> //std::move
36 #include <vector>
37 
38 namespace ROOT {
39 
40  class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
41  friend TExecutorCRTP;
42  public:
43 
44  explicit TThreadExecutor(UInt_t nThreads = 0u);
45 
46  TThreadExecutor(const TThreadExecutor &) = delete;
48 
49  // ForEach
50  //
51  template<class F>
52  void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
53  template<class F, class INTEGER>
54  void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
55  template<class F, class T>
56  void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
57  template<class F, class T>
58  void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
59  template<class F, class T>
60  void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
61 
62  // Map
63  //
65 
66  // MapReduce
67  //
68  // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
69  // this class but not in the base class.
70  //
71  // the late return types also check at compile-time whether redfunc is compatible with func,
72  // other than checking that func is compatible with the type of arguments.
73  // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
75  template<class F, class R, class Cond = noReferenceCond<F>>
76  auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type;
77  template<class F, class R, class Cond = noReferenceCond<F>>
78  auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type;
79  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
80  auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type;
81  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
82  auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
83  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
84  auto MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
85  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
86  auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
87  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
88  auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
89  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
90  auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
91 
93  template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
94  template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
95 
96  unsigned GetPoolSize() const;
97 
98  private:
99  // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
100  //
101  template<class F, class Cond = noReferenceCond<F>>
102  auto MapImpl(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
103  template<class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
104  auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
105  template<class F, class T, class Cond = noReferenceCond<F, T>>
106  auto MapImpl(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
107  template<class F, class T, class Cond = noReferenceCond<F, T>>
108  auto MapImpl(F func, const std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
109 
110  // Extension of the Map interfaces with chunking, specific to this class and
111  // only available from a MapReduce call.
112  template<class F, class R, class Cond = noReferenceCond<F>>
113  auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
114  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
115  auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
116  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
117  auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
118  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
119  auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
120  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
121  auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
122 
123  // Functions that interface with the parallel library used as a backend
124  void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
125  double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
126  float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
127  template<class T, class R>
128  auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
129 
130  /// Pointer to the TBB task arena wrapper
131  std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
132  };
133 
134  /************ TEMPLATE METHODS IMPLEMENTATION ******************/
135 
136  //////////////////////////////////////////////////////////////////////////
137  /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
138  ///
139  /// \param func Function to be executed.
140  /// \param nTimes Number of times function should be called.
141  /// \param nChunks Number of chunks to split the input data for processing.
142  template<class F>
143  void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
144  if (nChunks == 0) {
145  ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
146  return;
147  }
148 
149  unsigned step = (nTimes + nChunks - 1) / nChunks;
150  auto lambda = [&](unsigned int i)
151  {
152  for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
153  func();
154  }
155  };
156  ParallelFor(0U, nTimes, step, lambda);
157  }
158 
159  //////////////////////////////////////////////////////////////////////////
160  /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
161  ///
162  /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
163  /// \param args Sequence of indexes to execute `func` on.
164  /// \param nChunks Number of chunks to split the input data for processing.
165  template<class F, class INTEGER>
166  void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
167  if (nChunks == 0) {
168  ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
169  return;
170  }
171  unsigned start = *args.begin();
172  unsigned end = *args.end();
173  unsigned seqStep = args.step();
174  unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
175 
176  auto lambda = [&](unsigned int i)
177  {
178  for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
179  func(i + j);
180  }
181  };
182  ParallelFor(start, end, step, lambda);
183  }
184 
185  //////////////////////////////////////////////////////////////////////////
186  /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
187  ///
188  /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
189  /// \param args initializer_list for a vector to apply `func` on.
190  /// \param nChunks Number of chunks to split the input data for processing.
191  template<class F, class T>
192  void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
193  std::vector<T> vargs(std::move(args));
194  Foreach(func, vargs, nChunks);
195  }
196 
197  //////////////////////////////////////////////////////////////////////////
198  /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
199  ///
200  /// \param func Function to be executed on the elements of the vector passed as second parameter.
201  /// \param args Vector of elements passed as an argument to `func`.
202  /// \param nChunks Number of chunks to split the input data for processing.
203  template<class F, class T>
204  void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
205  unsigned int nToProcess = args.size();
206  if (nChunks == 0) {
207  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
208  return;
209  }
210 
211  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
212  auto lambda = [&](unsigned int i)
213  {
214  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
215  func(args[i + j]);
216  }
217  };
218  ParallelFor(0U, nToProcess, step, lambda);
219  }
220 
221  //////////////////////////////////////////////////////////////////////////
222  /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
223  ///
224  /// \param func Function to be executed on the elements of the vector passed as second parameter.
225  /// \param args Immutable vector of elements passed as an argument to `func`.
226  /// \param nChunks Number of chunks to split the input data for processing.
227  template<class F, class T>
228  void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
229  unsigned int nToProcess = args.size();
230  if (nChunks == 0) {
231  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
232  return;
233  }
234 
235  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
236  auto lambda = [&](unsigned int i)
237  {
238  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
239  func(args[i + j]);
240  }
241  };
242  ParallelFor(0U, nToProcess, step, lambda);
243  }
244 
245  //////////////////////////////////////////////////////////////////////////
246  /// \brief Execute a function without arguments several times in parallel.
247  /// Implementation of the Map method.
248  ///
249  /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
250  template<class F, class Cond>
251  auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type> {
252  using retType = decltype(func());
253  std::vector<retType> reslist(nTimes);
254  auto lambda = [&](unsigned int i)
255  {
256  reslist[i] = func();
257  };
258  ParallelFor(0U, nTimes, 1, lambda);
259 
260  return reslist;
261  }
262 
263  //////////////////////////////////////////////////////////////////////////
264  /// \brief Execute a function over a sequence of indexes in parallel.
265  /// Implementation of the Map method.
266  ///
267  /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
268  template<class F, class INTEGER, class Cond>
269  auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
270  unsigned start = *args.begin();
271  unsigned end = *args.end();
272  unsigned seqStep = args.step();
273 
274  using retType = decltype(func(start));
275  std::vector<retType> reslist(args.size());
276  auto lambda = [&](unsigned int i)
277  {
278  reslist[i] = func(i);
279  };
280  ParallelFor(start, end, seqStep, lambda);
281 
282  return reslist;
283  }
284 
285  //////////////////////////////////////////////////////////////////////////
286  /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
287  /// providing a result per chunk.
288  ///
289  /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
290  template<class F, class R, class Cond>
291  auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
292  if (nChunks == 0)
293  {
294  return Map(func, nTimes);
295  }
296 
297  unsigned step = (nTimes + nChunks - 1) / nChunks;
298  // Avoid empty chunks
299  unsigned actualChunks = (nTimes + step - 1) / step;
300  using retType = decltype(func());
301  std::vector<retType> reslist(actualChunks);
302  auto lambda = [&](unsigned int i)
303  {
304  std::vector<retType> partialResults(std::min(nTimes-i, step));
305  for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
306  partialResults[j] = func();
307  }
308  reslist[i / step] = Reduce(partialResults, redfunc);
309  };
310  ParallelFor(0U, nTimes, step, lambda);
311 
312  return reslist;
313  }
314 
315  //////////////////////////////////////////////////////////////////////////
316  /// \brief Execute a function over the elements of a vector in parallel.
317  /// Implementation of the Map method.
318  ///
319  /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
320  template<class F, class T, class Cond>
321  auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type> {
322  // //check whether func is callable
323  using retType = decltype(func(args.front()));
324 
325  unsigned int nToProcess = args.size();
326  std::vector<retType> reslist(nToProcess);
327 
328  auto lambda = [&](unsigned int i)
329  {
330  reslist[i] = func(args[i]);
331  };
332 
333  ParallelFor(0U, nToProcess, 1, lambda);
334 
335  return reslist;
336  }
337 
338  //////////////////////////////////////////////////////////////////////////
339  /// \brief Execute a function over the elements of a vector in parallel.
340  /// Implementation of the Map method.
341  ///
342  /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
343  template<class F, class T, class Cond>
344  auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type> {
345  // //check whether func is callable
346  using retType = decltype(func(args.front()));
347 
348  unsigned int nToProcess = args.size();
349  std::vector<retType> reslist(nToProcess);
350 
351  auto lambda = [&](unsigned int i)
352  {
353  reslist[i] = func(args[i]);
354  };
355 
356  ParallelFor(0U, nToProcess, 1, lambda);
357 
358  return reslist;
359  }
360 
361  //////////////////////////////////////////////////////////////////////////
362  /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
363  /// providing a result per chunk.
364  ///
365  /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
366  template<class F, class INTEGER, class R, class Cond>
367  auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
368  if (nChunks == 0)
369  {
370  return Map(func, args);
371  }
372 
373  unsigned start = *args.begin();
374  unsigned end = *args.end();
375  unsigned seqStep = args.step();
376  unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
377  // Avoid empty chunks
378  unsigned actualChunks = (end - start + step - 1) / step;
379 
380  using retType = decltype(func(start));
381  std::vector<retType> reslist(actualChunks);
382  auto lambda = [&](unsigned int i)
383  {
384  std::vector<retType> partialResults(std::min(end-i, step));
385  for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
386  partialResults[j] = func(i + j);
387  }
388  reslist[i / step] = Reduce(partialResults, redfunc);
389  };
390  ParallelFor(start, end, step, lambda);
391 
392  return reslist;
393  }
394 
395  //////////////////////////////////////////////////////////////////////////
396  /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
397  /// providing a result per chunk.
398  ///
399  /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
400  template<class F, class T, class R, class Cond>
401  auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
402  if (nChunks == 0)
403  {
404  return Map(func, args);
405  }
406 
407  unsigned int nToProcess = args.size();
408  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
409  // Avoid empty chunks
410  unsigned actualChunks = (nToProcess + step - 1) / step;
411 
412  using retType = decltype(func(args.front()));
413  std::vector<retType> reslist(actualChunks);
414  auto lambda = [&](unsigned int i)
415  {
416  std::vector<T> partialResults(step);
417  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
418  partialResults[j] = func(args[i + j]);
419  }
420  reslist[i / step] = Reduce(partialResults, redfunc);
421  };
422 
423  ParallelFor(0U, nToProcess, step, lambda);
424 
425  return reslist;
426  }
427 
428  //////////////////////////////////////////////////////////////////////////
429  /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
430  /// providing a result per chunk.
431  ///
432  /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
433  template<class F, class T, class R, class Cond>
434  auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
435  if (nChunks == 0)
436  {
437  return Map(func, args);
438  }
439 
440  unsigned int nToProcess = args.size();
441  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
442  // Avoid empty chunks
443  unsigned actualChunks = (nToProcess + step - 1) / step;
444 
445  using retType = decltype(func(args.front()));
446  std::vector<retType> reslist(actualChunks);
447  auto lambda = [&](unsigned int i)
448  {
449  std::vector<T> partialResults(step);
450  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
451  partialResults[j] = func(args[i + j]);
452  }
453  reslist[i / step] = Reduce(partialResults, redfunc);
454  };
455 
456  ParallelFor(0U, nToProcess, step, lambda);
457 
458  return reslist;
459  }
460 
461  //////////////////////////////////////////////////////////////////////////
462  /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
463  /// providing a result per chunk.
464  ///
465  /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
466  template<class F, class T, class R, class Cond>
467  auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
468  std::vector<T> vargs(std::move(args));
469  const auto &reslist = Map(func, vargs, redfunc, nChunks);
470  return reslist;
471  }
472 
473  //////////////////////////////////////////////////////////////////////////
474  /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
475  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
476  template<class F, class R, class Cond>
477  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type {
478  return Reduce(Map(func, nTimes), redfunc);
479  }
480 
481  //////////////////////////////////////////////////////////////////////////
482  /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
483  /// Benefits from partial reduction into `nChunks` intermediate results.
484  ///
485  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
486  template<class F, class R, class Cond>
487  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type {
488  return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
489  }
490 
491  //////////////////////////////////////////////////////////////////////////
492  /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
493  /// Benefits from partial reduction into `nChunks` intermediate results.
494  ///
495  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
496  template<class F, class INTEGER, class R, class Cond>
497  auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type {
498  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
499  }
500 
501  //////////////////////////////////////////////////////////////////////////
502  /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
503  /// Benefits from partial reduction into `nChunks` intermediate results.
504  ///
505  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
506  template<class F, class T, class R, class Cond>
507  auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
508  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
509  }
510 
511  //////////////////////////////////////////////////////////////////////////
512  /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
513  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
514  template<class F, class T, class R, class Cond>
515  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
516  return Reduce(Map(func, args), redfunc);
517  }
518 
519  //////////////////////////////////////////////////////////////////////////
520  /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
521  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
522  template<class F, class T, class R, class Cond>
523  auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
524  return Reduce(Map(func, args), redfunc);
525  }
526 
527  //////////////////////////////////////////////////////////////////////////
528  /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
529  /// Benefits from partial reduction into `nChunks` intermediate results.
530  ///
531  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
532  template<class F, class T, class R, class Cond>
533  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
534  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
535  }
536 
537  //////////////////////////////////////////////////////////////////////////
538  /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
539  /// Benefits from partial reduction into `nChunks` intermediate results.
540  ///
541  /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
542  template<class F, class T, class R, class Cond>
543  auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
544  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
545  }
546 
547  //////////////////////////////////////////////////////////////////////////
548  /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
549  template<class T, class R>
550  auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
551  {
552  // check we can apply reduce to objs
553  static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
554  return SeqReduce(objs, redfunc);
555  }
556 
557  //////////////////////////////////////////////////////////////////////////
558  /// \brief "Reduce" an std::vector into a single object in parallel by passing a
559  /// binary function as the second argument defining the reduction operation.
560  ///
561  /// \param objs A vector of elements to combine.
562  /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
563  /// \return A value result of combining the vector elements into a single object of the same type.
564  template<class T, class BINARYOP>
565  auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
566  {
567  // check we can apply reduce to objs
568  static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
569  return ParallelReduce(objs, redfunc);
570  }
571 
572  //////////////////////////////////////////////////////////////////////////
573  /// \brief "Reduce", sequentially, an std::vector into a single object
574  ///
575  /// \param objs A vector of elements to combine.
576  /// \param redfunc Reduction function to combine the elements of the vector `objs`.
577  /// \return A value result of combining the vector elements into a single object of the same type.
578  template<class T, class R>
579  auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
580  {
581  return redfunc(objs);
582  }
583 
584 } // namespace ROOT
585 
586 #endif // R__USE_IMT
587 #endif
ROOT::VecOps::Map
auto Map(Args &&... args) -> decltype(ROOT::Detail::VecOps::MapFromTuple(std::forward_as_tuple(args...), std::make_index_sequence< sizeof...(args) - 1 >()))
Create new collection applying a callable to the elements of the input collection.
Definition: RVec.hxx:911
ROOT::TExecutorCRTP
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
Definition: TExecutorCRTP.hxx:102
f
#define f(i)
Definition: RSha256.hxx:104
ROOT::TThreadExecutor::SeqReduce
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
Definition: TThreadExecutor.hxx:579
ROOT::TThreadExecutor::TExecutorCRTP
friend TExecutorCRTP
Definition: TThreadExecutor.hxx:41
ROOT::TThreadExecutor::ParallelFor
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
Definition: TThreadExecutor.cxx:158
ROOT::TSeq::begin
iterator begin() const
Definition: TSeq.hxx:163
F
#define F(x, y, z)
ROOT::TThreadExecutor::TThreadExecutor
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
Definition: TThreadExecutor.cxx:146
ROOT::TThreadExecutor
This class provides a simple interface to execute the same task multiple times in parallel threads,...
Definition: TThreadExecutor.hxx:40
ROOT::TThreadExecutor::ParallelReduce
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
Definition: TThreadExecutor.cxx:180
ROOT::TThreadExecutor::Map
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F()>::type >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
Definition: TThreadExecutor.hxx:291
ROOT::TThreadExecutor::Foreach
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
Definition: TThreadExecutor.hxx:143
TExecutorCRTP.hxx
b
#define b(i)
Definition: RSha256.hxx:100
ROOT::TThreadExecutor::TThreadExecutor
TThreadExecutor(const TThreadExecutor &)=delete
R
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
ROOT::TThreadExecutor::Reduce
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
Definition: TThreadExecutor.hxx:550
a
auto * a
Definition: textangle.C:12
TSeq.hxx
ROOT::R::function
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
ROOT::TThreadExecutor::fTaskArenaW
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
Definition: TThreadExecutor.hxx:131
ROOT::TThreadExecutor::operator=
TThreadExecutor & operator=(const TThreadExecutor &)=delete
RTaskArena.hxx
ROOT::TSeq::step
T step() const
Definition: TSeq.hxx:184
unsigned int
ROOT::TSeq::end
iterator end() const
Definition: TSeq.hxx:166
ROOT::TThreadExecutor::GetPoolSize
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
Definition: TThreadExecutor.cxx:213
ROOT::TThreadExecutor::MapImpl
auto MapImpl(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute a function without arguments several times in parallel.
Definition: TThreadExecutor.hxx:251
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
type
int type
Definition: TGX11.cxx:121
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4
ROOT::TThreadExecutor::MapReduce
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
Definition: TThreadExecutor.hxx:477
TError.h