Logo ROOT   6.12/07
Reference Guide
TThreadExecutor.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Xavier Valls March 2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2006, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TThreadExecutor
13 #define ROOT_TThreadExecutor
14 
15 #include "RConfigure.h"
16 
17 // exclude in case ROOT does not have IMT support
18 #ifndef R__USE_IMT
19 // No need to error out for dictionaries.
20 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22 # endif
23 #else
24 
25 #include "ROOT/TExecutor.hxx"
26 #include "ROOT/TPoolManager.hxx"
27 #include "TROOT.h"
28 #include "TError.h"
29 #include <functional>
30 #include <memory>
31 #include <numeric>
32 
33 namespace ROOT {
34 
35  class TThreadExecutor: public TExecutor<TThreadExecutor> {
36  public:
37  explicit TThreadExecutor();
38 
39  explicit TThreadExecutor(UInt_t nThreads);
40 
41  TThreadExecutor(TThreadExecutor &) = delete;
43 
44  template<class F>
45  void Foreach(F func, unsigned nTimes);
46  template<class F, class INTEGER>
47  void Foreach(F func, ROOT::TSeq<INTEGER> args);
48  /// \cond
49  template<class F, class T>
50  void Foreach(F func, std::initializer_list<T> args);
51  /// \endcond
52  template<class F, class T>
53  void Foreach(F func, std::vector<T> &args);
54 
56  template<class F, class Cond = noReferenceCond<F>>
57  auto Map(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
58  template<class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
60  template<class F, class T, class Cond = noReferenceCond<F, T>>
61  auto Map(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
62 
63  // // MapReduce
64  // // the late return types also check at compile-time whether redfunc is compatible with func,
65  // // other than checking that func is compatible with the type of arguments.
66  // // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
68  template<class F, class R, class Cond = noReferenceCond<F>>
69  auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type;
70  template<class F, class R, class Cond = noReferenceCond<F>>
71  auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type;
72  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
73  auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type;
74  /// \cond
75  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
76  auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
77  /// \endcond
78  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
79  auto MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
80  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
81  auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
82 
84  template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
85  template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
86 
87  protected:
88  template<class F, class R, class Cond = noReferenceCond<F>>
89  auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
90  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
91  auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
92  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
93  auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
94  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
95  auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
96 
97  private:
98  void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
99  double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
100  float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
101  template<class T, class R>
102  auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
103 
104  std::shared_ptr<ROOT::Internal::TPoolManager> fSched = nullptr;
105  };
106 
107  /************ TEMPLATE METHODS IMPLEMENTATION ******************/
108 
109  //////////////////////////////////////////////////////////////////////////
110  /// Execute func (with no arguments) nTimes in parallel.
111  /// Functions that take more than zero arguments can be executed (with
112  /// fixed arguments) by wrapping them in a lambda or with std::bind.
113  template<class F>
114  void TThreadExecutor::Foreach(F func, unsigned nTimes) {
115  ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
116  }
117 
118  //////////////////////////////////////////////////////////////////////////
119  /// Execute func in parallel, taking an element of a
120  /// sequence as argument.
121  template<class F, class INTEGER>
123  ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
124  }
125 
126  /// \cond
127  //////////////////////////////////////////////////////////////////////////
128  /// Execute func in parallel, taking an element of a
129  /// initializer_list as argument.
130  template<class F, class T>
131  void TThreadExecutor::Foreach(F func, std::initializer_list<T> args) {
132  std::vector<T> vargs(std::move(args));
133  Foreach(func, vargs);
134  }
135  /// \endcond
136 
137  //////////////////////////////////////////////////////////////////////////
138  /// Execute func in parallel, taking an element of an
139  /// std::vector as argument.
140  template<class F, class T>
141  void TThreadExecutor::Foreach(F func, std::vector<T> &args) {
142  unsigned int nToProcess = args.size();
143  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
144  }
145 
146  //////////////////////////////////////////////////////////////////////////
147  /// Execute func (with no arguments) nTimes in parallel.
148  /// A vector containg executions' results is returned.
149  /// Functions that take more than zero arguments can be executed (with
150  /// fixed arguments) by wrapping them in a lambda or with std::bind.
151  template<class F, class Cond>
153  using retType = decltype(func());
154  std::vector<retType> reslist(nTimes);
155  auto lambda = [&](unsigned int i)
156  {
157  reslist[i] = func();
158  };
159  ParallelFor(0U, nTimes, 1, lambda);
160 
161  return reslist;
162  }
163 
164  //////////////////////////////////////////////////////////////////////////
165  /// Execute func in parallel, taking an element of a
166  /// sequence as argument.
167  /// A vector containg executions' results is returned.
168  template<class F, class INTEGER, class Cond>
170  unsigned start = *args.begin();
171  unsigned end = *args.end();
172  unsigned seqStep = args.step();
173 
174  using retType = decltype(func(start));
175  std::vector<retType> reslist(end - start);
176  auto lambda = [&](unsigned int i)
177  {
178  reslist[i] = func(i);
179  };
180  ParallelFor(start, end, seqStep, lambda);
181 
182  return reslist;
183  }
184 
185  //////////////////////////////////////////////////////////////////////////
186  /// Execute func (with no arguments) nTimes in parallel.
187  /// Divides and groups the executions in nChunks (if it doesn't make sense will reduce the number of chunks) with partial reduction;
188  /// A vector containg partial reductions' results is returned.
189  template<class F, class R, class Cond>
190  auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
191  if (nChunks == 0)
192  {
193  return Map(func, nTimes);
194  }
195 
196  unsigned step = (nTimes + nChunks - 1) / nChunks;
197  // Avoid empty chunks
198  unsigned actualChunks = (nTimes + step - 1) / step;
199  using retType = decltype(func());
200  std::vector<retType> reslist(actualChunks);
201  auto lambda = [&](unsigned int i)
202  {
203  std::vector<retType> partialResults(std::min(nTimes-i, step));
204  for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
205  partialResults[j] = func();
206  }
207  reslist[i / step] = redfunc(partialResults);
208  };
209  ParallelFor(0U, nTimes, step, lambda);
210 
211  return reslist;
212  }
213 
214  //////////////////////////////////////////////////////////////////////////
215  /// Execute func in parallel, taking an element of an
216  /// std::vector as argument.
217  /// A vector containg executions' results is returned.
218  // actual implementation of the Map method. all other calls with arguments eventually
219  // call this one
220  template<class F, class T, class Cond>
222  // //check whether func is callable
223  using retType = decltype(func(args.front()));
224 
225  unsigned int nToProcess = args.size();
226  std::vector<retType> reslist(nToProcess);
227 
228  auto lambda = [&](unsigned int i)
229  {
230  reslist[i] = func(args[i]);
231  };
232 
233  ParallelFor(0U, nToProcess, 1, lambda);
234 
235  return reslist;
236  }
237 
238  //////////////////////////////////////////////////////////////////////////
239  /// Execute func in parallel, taking an element of a
240  /// sequence as argument.
241  /// Divides and groups the executions in nChunks (if it doesn't make sense will reduce the number of chunks) with partial reduction\n
242  /// A vector containg partial reductions' results is returned.
243  template<class F, class INTEGER, class R, class Cond>
245  if (nChunks == 0)
246  {
247  return Map(func, args);
248  }
249 
250  unsigned start = *args.begin();
251  unsigned end = *args.end();
252  unsigned seqStep = args.step();
253  unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
254  // Avoid empty chunks
255  unsigned actualChunks = (end - start + step - 1) / step;
256 
257  using retType = decltype(func(start));
258  std::vector<retType> reslist(actualChunks);
259  auto lambda = [&](unsigned int i)
260  {
261  std::vector<retType> partialResults(std::min(end-i, step));
262  for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
263  partialResults[j] = func(i + j);
264  }
265  reslist[i / step] = redfunc(partialResults);
266  };
267  ParallelFor(start, end, step, lambda);
268 
269  return reslist;
270  }
271 
272 /// \cond
273  //////////////////////////////////////////////////////////////////////////
274  /// Execute func in parallel, taking an element of an
275  /// std::vector as argument. Divides and groups the executions in nChunks with partial reduction.
276  /// If it doesn't make sense will reduce the number of chunks.\n
277  /// A vector containg partial reductions' results is returned.
278  template<class F, class T, class R, class Cond>
279  auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
280  if (nChunks == 0)
281  {
282  return Map(func, args);
283  }
284 
285  unsigned int nToProcess = args.size();
286  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
287  // Avoid empty chunks
288  unsigned actualChunks = (nToProcess + step - 1) / step;
289 
290  using retType = decltype(func(args.front()));
291  std::vector<retType> reslist(actualChunks);
292  auto lambda = [&](unsigned int i)
293  {
294  std::vector<T> partialResults(step);
295  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
296  partialResults[j] = func(args[i + j]);
297  }
298  reslist[i / step] = redfunc(partialResults);
299  };
300 
301  ParallelFor(0U, nToProcess, step, lambda);
302 
303  return reslist;
304  }
305 
306  //////////////////////////////////////////////////////////////////////////
307  /// Execute func in parallel, taking an element of an
308  /// std::initializer_list as an argument. Divides and groups the executions in nChunks with partial reduction.
309  /// If it doesn't make sense will reduce the number of chunks.\n
310  /// A vector containg partial reductions' results is returned.
311  template<class F, class T, class R, class Cond>
312  auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
313  std::vector<T> vargs(std::move(args));
314  const auto &reslist = Map(func, vargs, redfunc, nChunks);
315  return reslist;
316  }
317 /// \endcond
318 
319 
320  //////////////////////////////////////////////////////////////////////////
321  /// This method behaves just like Map, but an additional redfunc function
322  /// must be provided. redfunc is applied to the vector Map would return and
323  /// must return the same type as func. In practice, redfunc can be used to
324  /// "squash" the vector returned by Map into a single object by merging,
325  /// adding, mixing the elements of the vector.\n
326  /// The fourth argument indicates the number of chunks we want to divide our work in.
327  template<class F, class R, class Cond>
328  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type {
329  return Reduce(Map(func, nTimes), redfunc);
330  }
331 
332  template<class F, class R, class Cond>
333  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type {
334  return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
335  }
336 
337  template<class F, class INTEGER, class R, class Cond>
338  auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type {
339  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
340  }
341  /// \cond
342  template<class F, class T, class R, class Cond>
343  auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
344  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
345  }
346  /// \endcond
347 
348  template<class F, class T, class R, class Cond>
349  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
350  return Reduce(Map(func, args), redfunc);
351  }
352 
353  template<class F, class T, class R, class Cond>
354  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
355  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
356  }
357 
358  //////////////////////////////////////////////////////////////////////////
359  /// "Reduce" an std::vector into a single object in parallel by passing a
360  /// binary operator as the second argument to act on pairs of elements of the std::vector.
361  template<class T, class BINARYOP>
362  auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
363  {
364  // check we can apply reduce to objs
365  static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
366  return ParallelReduce(objs, redfunc);
367  }
368 
369  //////////////////////////////////////////////////////////////////////////
370  /// "Reduce" an std::vector into a single object by passing a
371  /// function as the second argument defining the reduction operation.
372  template<class T, class R>
373  auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
374  {
375  // check we can apply reduce to objs
376  static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
377  return SeqReduce(objs, redfunc);
378  }
379 
380  template<class T, class R>
381  auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
382  {
383  return redfunc(objs);
384  }
385 
386 } // namespace ROOT
387 
388 #endif // R__USE_IMT
389 #endif
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
double T(double x)
Definition: ChebyshevPol.h:34
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
This class defines an interface to execute the same task multiple times in parallel, possibly with different arguments every time.
Definition: TExecutor.hxx:61
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
This class provides a simple interface to execute the same task multiple times in parallel...
#define F(x, y, z)
TThreadExecutor & operator=(TThreadExecutor &)=delete
auto * a
Definition: textangle.C:12
unsigned int UInt_t
Definition: RtypesCore.h:42
TThreadExecutor()
Class constructor.
T step() const
Definition: TSeq.hxx:184
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
int type
Definition: TGX11.cxx:120
iterator end() const
Definition: TSeq.hxx:166
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
std::shared_ptr< ROOT::Internal::TPoolManager > fSched
constexpr Double_t R()
Definition: TMath.h:213
iterator begin() const
Definition: TSeq.hxx:163