Logo ROOT   master
Reference Guide
Arithmetic.hxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////
13 // Implementation of Helper arithmetic functions for the //
14 // multi-threaded CPU implementation of DNNs. //
15 ////////////////////////////////////////////////////////////
16 
18 
19 #ifdef R__HAS_TMVACPU
21 #else
23 #include "TVectorT.h"
24 #endif
25 
26 #if defined(__GNUC__)
27 #pragma GCC diagnostic push
28 #pragma GCC diagnostic ignored "-Wshadow"
29 
30 //#include "tbb/tbb.h"
31 
32 #pragma GCC diagnostic pop
33 #endif
34 
35 namespace TMVA
36 {
37 namespace DNN
38 {
39 
40 //____________________________________________________________________________
41 template<typename AReal>
43  const TCpuMatrix<AReal> &A,
44  const TCpuMatrix<AReal> &B)
45 {
46  int m = (int) A.GetNrows();
47  int k = (int) A.GetNcols();
48  int n = (int) B.GetNcols();
49 
50  R__ASSERT((int) C.GetNrows() == m);
51  R__ASSERT((int) C.GetNcols() == n);
52  R__ASSERT((int) B.GetNrows() == k);
53 
54 #ifdef R__HAS_TMVACPU
55 
56  char transa = 'N';
57  char transb = 'N';
58 
59  AReal alpha = 1.0;
60  AReal beta = 0.0;
61 
62  const AReal * APointer = A.GetRawDataPointer();
63  const AReal * BPointer = B.GetRawDataPointer();
64  AReal * CPointer = C.GetRawDataPointer();
65 
66  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
67  APointer, &m, BPointer, &k, &beta, CPointer, &m);
68 #else
69  TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
70  tmp.Mult(A,B);
71  C = tmp;
72 #endif
73 }
74 
75 //____________________________________________________________________________
76 template<typename AReal>
78  const TCpuMatrix<AReal> &A,
79  const TCpuMatrix<AReal> &B,
80  AReal alpha, AReal beta)
81 {
82 #ifdef R__HAS_TMVACPU
83  int m = (int) A.GetNcols();
84  int k = (int) A.GetNrows();
85  int n = (int) B.GetNcols();
86 
87  R__ASSERT((int) C.GetNrows() == m);
88  R__ASSERT((int) C.GetNcols() == n);
89  R__ASSERT((int) B.GetNrows() == k);
90 
91  char transa = 'T';
92  char transb = 'N';
93 
94  //AReal alpha = 1.0;
95  //AReal beta = 0.0;
96 
97  const AReal *APointer = A.GetRawDataPointer();
98  const AReal *BPointer = B.GetRawDataPointer();
99  AReal *CPointer = C.GetRawDataPointer();
100 
101  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
102  APointer, &k, BPointer, &k, &beta, CPointer, &m);
103 #else
104  TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
105  tmp.TMult(A, B);
106  tmp = alpha * tmp;
107  if (beta != 0.0) {
108  TMatrixT<AReal> tmp0(C);
109  tmp = tmp + beta * tmp0;
110  }
111  C = tmp;
112 #endif
113 }
114 
115 //____________________________________________________________________________
116 template<typename AReal>
118  const TCpuMatrix<AReal> &A)
119 {
120  const AReal *dataA = A.GetRawDataPointer();
121  AReal *dataB = B.GetRawDataPointer();
122 
123  size_t nElements = A.GetNoElements();
124  R__ASSERT(B.GetNoElements() == nElements);
125  size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
126 
127  auto f = [&](UInt_t workerID)
128  {
129  for (size_t j = 0; j < nSteps; ++j) {
130  size_t idx = workerID+j;
131  if (idx >= nElements) break;
132  dataB[idx] *= dataA[idx];
133  }
134  return 0;
135  };
136 
137  if (nSteps < nElements) {
138 #ifdef DL_USE_MTE
139  B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
140 #else
141  for (size_t i = 0; i < nElements ; i+= nSteps)
142  f(i);
143 #endif
144  }
145  else {
146  f(0);
147  }
148 }
149 
150 //____________________________________________________________________________
151 template<typename AReal>
153  const TCpuTensor<AReal> &A)
154 {
155  const AReal *dataA = A.GetRawDataPointer();
156  AReal *dataB = B.GetRawDataPointer();
157 
158  size_t nElements = A.GetNoElements();
159  R__ASSERT(B.GetNoElements() == nElements);
160  size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
161 
162  auto f = [&](UInt_t workerID)
163  {
164  for (size_t j = 0; j < nSteps; ++j) {
165  size_t idx = workerID+j;
166  if (idx >= nElements) break;
167  dataB[idx] *= dataA[idx];
168  }
169  return 0;
170  };
171 
172  if (nSteps < nElements) {
173 #ifdef DL_USE_MTE
175 #else
176  for (size_t i = 0; i < nElements ; i+= nSteps)
177  f(i);
178 #endif
179  }
180  else {
181  f(0);
182  }
183 }
184 
185 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186 /// \brief Checks two matrices for element-wise equality.
187 /// \tparam AReal An architecture-specific floating point number type.
188 /// \param A The first matrix.
189 /// \param B The second matrix.
190 /// \param epsilon Equality tolerance, needed to address floating point arithmetic.
191 /// \return Whether the two matrices can be considered equal element-wise
192 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
193 template<typename AReal>
195 {
196  if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
197  Fatal("AlmostEquals", "The passed matrices have unequal shapes.");
198  }
199 
200  const AReal *dataA = A.GetRawDataPointer();
201  const AReal *dataB = B.GetRawDataPointer();
202  size_t nElements = A.GetNoElements();
203 
204  for(size_t i = 0; i < nElements; i++) {
205  if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
206  }
207  return true;
208 }
209 
210 //____________________________________________________________________________
211 template<typename AReal>
213  const TCpuMatrix<AReal> &A,
214  AReal alpha, AReal beta)
215 {
216 
217  int m = (int) A.GetNrows();
218  int n = (int) A.GetNcols();
219 
220  assert((int) B.GetNoElements() >= n);
221 
222 #ifdef R__HAS_TMVACPU
223  int inc = 1;
224  char trans = 'T';
225 
226  const AReal * APointer = A.GetRawDataPointer();
227  AReal * BPointer = B.GetRawDataPointer();
228 
229  // compute B = alpha * A * I + beta * B
230 
231  ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
233  &beta, BPointer, &inc);
234 #else
235  TMatrixT<AReal> tA(A);
236  tA.T();
238  TVectorT<AReal> tmp(n, B.GetRawDataPointer());
239  assert(B.GetNrows() == 1 || B.GetNcols() == 1);
240  tmp = alpha * tA * ones + beta * tmp;
241  // copy result buffer in B matrix
242  std::copy(tmp.GetMatrixArray(), tmp.GetMatrixArray() + n, B.GetRawDataPointer());
243 #endif
244 }
245 
246 //____________________________________________________________________________
247 template<typename AReal>
249  const TCpuMatrix<AReal> &A,
250  AReal alpha)
251 {
252 #ifdef R__HAS_TMVACPU
253  int n = (int) (A.GetNcols() * A.GetNrows());
254  int inc = 1;
255 
256  const AReal *x = A.GetRawDataPointer();
257  AReal *y = B.GetRawDataPointer();
258 
259  ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
260 #else
261  TMatrixT<AReal> tmp(B);
262  TReference<AReal>::ScaleAdd(tmp, A, alpha);
263  B = tmp;
264 #endif
265 }
266 
267 //____________________________________________________________________________
268 template<typename AReal>
270  const TCpuMatrix<AReal> &A)
271 {
272  auto f = [](AReal x) {return x;};
273  B.MapFrom(f, A);
274 }
275 
276 
277 //____________________________________________________________________________
278 template<typename AReal>
280  const TCpuTensor<AReal> &A,
281  AReal alpha)
282 {
283  // should re-implemented at tensor level
284  for (size_t i = 0; i < B.GetFirstSize(); ++i) {
285  TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
286  ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
287  }
288 }
289 
290 //____________________________________________________________________________
291 template<typename AReal>
293  const TCpuTensor<AReal> &A)
294 {
295 
296  auto f = [](AReal x) {return x;};
297  B.MapFrom(f, A);
298 }
299 
300 //____________________________________________________________________________
301 template <typename AReal>
303 {
304  auto f = [beta](AReal x) { return x + beta; };
305  A.Map(f);
306 }
307 
308 //____________________________________________________________________________
309 template <typename AReal>
311 {
312  auto f = [beta](AReal x) { return x * beta; };
313  A.Map(f);
314 }
315 
316 //____________________________________________________________________________
317 template <typename AReal>
319 {
320  auto f = [](AReal x) { return 1.0 / x; };
321  A.Map(f);
322 }
323 
324 //____________________________________________________________________________
325 template <typename AReal>
327 {
328  auto f = [](AReal x) { return x * x; };
329  A.Map(f);
330 }
331 
332 //____________________________________________________________________________
333 template <typename AReal>
335 {
336  auto f = [](AReal x) { return sqrt(x); };
337  A.Map(f);
338 }
339 
340 /// Adam updates
341 //____________________________________________________________________________
342 template<typename AReal>
343 void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
344 {
345  // ADAM update the weights.
346  // Weight = Weight - alpha * M / (sqrt(V) + epsilon)
347  AReal * a = A.GetRawDataPointer();
348  const AReal * m = M.GetRawDataPointer();
349  const AReal * v = V.GetRawDataPointer();
350  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
351  a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
352  }
353 }
354 
355 //____________________________________________________________________________
356 template<typename AReal>
358 {
359  // First momentum weight gradient update for ADAM
360  // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients
361  AReal * a = A.GetRawDataPointer();
362  const AReal * b = B.GetRawDataPointer();
363  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
364  a[index] = beta * a[index] + (1.-beta) * b[index];
365  }
366 }
367 //____________________________________________________________________________
368 template<typename AReal>
370 {
371  // Second momentum weight gradient update for ADAM
372  // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2
373  AReal * a = A.GetRawDataPointer();
374  const AReal * b = B.GetRawDataPointer();
375  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
376  a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];
377  }
378 }
379 
380 
381 } // DNN
382 } // TMVA
static double B[]
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:357
The TCpuMatrix class.
Definition: CpuMatrix.h:86
auto * m
Definition: textangle.C:8
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:334
void Fatal(const char *location, const char *msgfmt,...)
unsigned int UInt_t
Definition: CPyCppyy.h:44
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:105
TVectorT.
Definition: TMatrixTBase.h:79
#define R__ASSERT(e)
Definition: TError.h:96
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Arithmetic.hxx:248
TMatrixT< Element > & T()
Definition: TMatrixT.h:150
#define f(i)
Definition: RSha256.hxx:104
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:191
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
Definition: Arithmetic.hxx:77
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:318
static double A[]
double beta(double x, double y)
Calculates the beta function.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:326
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by .
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:369
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A&#39; * B.
Definition: TMatrixT.cxx:855
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
static double C[]
auto * a
Definition: textangle.C:12
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Arithmetic.hxx:212
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:166
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
Definition: Arithmetic.hxx:152
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.hxx:343
REAL epsilon
Definition: triangle.c:617
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Double_t y[n]
Definition: legend1.C:17
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.hxx:76
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A...
Definition: Arithmetic.hxx:310
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.hxx:194
create variable transformations
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition: TMatrixT.cxx:651
const Int_t n
Definition: legend1.C:16
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C...
Definition: Arithmetic.hxx:42
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:302