Logo ROOT  
Reference Guide
Arithmetic.hxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////////////
13// Implementation of Helper arithmetic functions for the //
14// multi-threaded CPU implementation of DNNs. //
15////////////////////////////////////////////////////////////
16
18
19#ifdef R__HAS_TMVACPU
21#else
23#endif
24
25#pragma GCC diagnostic push
26#pragma GCC diagnostic ignored "-Wshadow"
27
28//#include "tbb/tbb.h"
29
30#pragma GCC diagnostic pop
31
32namespace TMVA
33{
34namespace DNN
35{
36
37//____________________________________________________________________________
38template<typename AReal>
40 const TCpuMatrix<AReal> &A,
41 const TCpuMatrix<AReal> &B)
42{
43 int m = (int) A.GetNrows();
44 int k = (int) A.GetNcols();
45 int n = (int) B.GetNcols();
46
47 R__ASSERT((int) C.GetNrows() == m);
48 R__ASSERT((int) C.GetNcols() == n);
49 R__ASSERT((int) B.GetNrows() == k);
50
51#ifdef R__HAS_TMVACPU
52
53 char transa = 'N';
54 char transb = 'N';
55
56 AReal alpha = 1.0;
57 AReal beta = 0.0;
58
59 const AReal * APointer = A.GetRawDataPointer();
60 const AReal * BPointer = B.GetRawDataPointer();
61 AReal * CPointer = C.GetRawDataPointer();
62
63 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
64 APointer, &m, BPointer, &k, &beta, CPointer, &m);
65#else
66 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
67 tmp.Mult(A,B);
68 C = tmp;
69#endif
70}
71
72//____________________________________________________________________________
73template<typename AReal>
75 const TCpuMatrix<AReal> &A,
76 const TCpuMatrix<AReal> &B,
77 AReal alpha, AReal beta)
78{
79#ifdef R__HAS_TMVACPU
80 int m = (int) A.GetNcols();
81 int k = (int) A.GetNrows();
82 int n = (int) B.GetNcols();
83
84 R__ASSERT((int) C.GetNrows() == m);
85 R__ASSERT((int) C.GetNcols() == n);
86 R__ASSERT((int) B.GetNrows() == k);
87
88 char transa = 'T';
89 char transb = 'N';
90
91 //AReal alpha = 1.0;
92 //AReal beta = 0.0;
93
94 const AReal *APointer = A.GetRawDataPointer();
95 const AReal *BPointer = B.GetRawDataPointer();
96 AReal *CPointer = C.GetRawDataPointer();
97
98 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
99 APointer, &k, BPointer, &k, &beta, CPointer, &m);
100#else
101 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
102 tmp.TMult(A,B);
103 tmp = alpha*tmp + beta;
104 C = tmp;
105#endif
106}
107
108//____________________________________________________________________________
109template<typename AReal>
111 const TCpuMatrix<AReal> &A)
112{
113 const AReal *dataA = A.GetRawDataPointer();
114 AReal *dataB = B.GetRawDataPointer();
115
116 size_t nElements = A.GetNoElements();
117 R__ASSERT(B.GetNoElements() == nElements);
118 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
119
120 auto f = [&](UInt_t workerID)
121 {
122 for (size_t j = 0; j < nSteps; ++j) {
123 size_t idx = workerID+j;
124 if (idx >= nElements) break;
125 dataB[idx] *= dataA[idx];
126 }
127 return 0;
128 };
129
130 if (nSteps < nElements) {
131#ifdef DL_USE_MTE
132 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
133#else
134 for (size_t i = 0; i < nElements ; i+= nSteps)
135 f(i);
136#endif
137 }
138 else {
139 f(0);
140 }
141}
142
143//____________________________________________________________________________
144template<typename AReal>
146 const TCpuTensor<AReal> &A)
147{
148 const AReal *dataA = A.GetRawDataPointer();
149 AReal *dataB = B.GetRawDataPointer();
150
151 size_t nElements = A.GetNoElements();
152 R__ASSERT(B.GetNoElements() == nElements);
153 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
154
155 auto f = [&](UInt_t workerID)
156 {
157 for (size_t j = 0; j < nSteps; ++j) {
158 size_t idx = workerID+j;
159 if (idx >= nElements) break;
160 dataB[idx] *= dataA[idx];
161 }
162 return 0;
163 };
164
165 if (nSteps < nElements) {
166#ifdef DL_USE_MTE
168#else
169 for (size_t i = 0; i < nElements ; i+= nSteps)
170 f(i);
171#endif
172 }
173 else {
174 f(0);
175 }
176}
177
178////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
179/// \brief Checks two matrices for element-wise equality.
180/// \tparam AReal An architecture-specific floating point number type.
181/// \param A The first matrix.
182/// \param B The second matrix.
183/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
184/// \return Whether the two matrices can be considered equal element-wise
185////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186template<typename AReal>
188{
189 if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
190 Fatal("AlmostEquals", "The passed matrices have unequal shapes.");
191 }
192
193 const AReal *dataA = A.GetRawDataPointer();
194 const AReal *dataB = B.GetRawDataPointer();
195 size_t nElements = A.GetNoElements();
196
197 for(size_t i = 0; i < nElements; i++) {
198 if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
199 }
200 return true;
201}
202
203//____________________________________________________________________________
204template<typename AReal>
206 const TCpuMatrix<AReal> &A,
207 AReal alpha, AReal beta)
208{
209#ifdef R__HAS_TMVACPU
210 int m = (int) A.GetNrows();
211 int n = (int) A.GetNcols();
212 int inc = 1;
213
214 // AReal alpha = 1.0;
215 //AReal beta = 0.0;
216 char trans = 'T';
217
218 const AReal * APointer = A.GetRawDataPointer();
219 AReal * BPointer = B.GetRawDataPointer();
220
221 ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
223 &beta, BPointer, &inc);
224#else
225 TMatrixT<AReal> tmp(B.GetNrows(), B.GetNcols());
227 tmp = alpha*tmp + beta;
228 B = tmp;
229#endif
230}
231
232//____________________________________________________________________________
233template<typename AReal>
235 const TCpuMatrix<AReal> &A,
236 AReal alpha)
237{
238#ifdef R__HAS_TMVACPU
239 int n = (int) (A.GetNcols() * A.GetNrows());
240 int inc = 1;
241
242 const AReal *x = A.GetRawDataPointer();
243 AReal *y = B.GetRawDataPointer();
244
245 ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
246#else
247 TMatrixT<AReal> tmp;
248 TReference<AReal>::ScaleAdd(tmp, A, alpha);
249 B = tmp;
250#endif
251}
252
253//____________________________________________________________________________
254template<typename AReal>
256 const TCpuMatrix<AReal> &A)
257{
258 auto f = [](AReal x) {return x;};
259 B.MapFrom(f, A);
260}
261
262
263//____________________________________________________________________________
264template<typename AReal>
266 const TCpuTensor<AReal> &A,
267 AReal alpha)
268{
269 // should re-implemented at tensor level
270 for (size_t i = 0; i < B.GetFirstSize(); ++i) {
271 TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
272 ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
273 }
274}
275
276//____________________________________________________________________________
277template<typename AReal>
279 const TCpuTensor<AReal> &A)
280{
281
282 auto f = [](AReal x) {return x;};
283 B.MapFrom(f, A);
284}
285
286//____________________________________________________________________________
287template <typename AReal>
289{
290 auto f = [beta](AReal x) { return x + beta; };
291 A.Map(f);
292}
293
294//____________________________________________________________________________
295template <typename AReal>
297{
298 auto f = [beta](AReal x) { return x * beta; };
299 A.Map(f);
300}
301
302//____________________________________________________________________________
303template <typename AReal>
305{
306 auto f = [](AReal x) { return 1.0 / x; };
307 A.Map(f);
308}
309
310//____________________________________________________________________________
311template <typename AReal>
313{
314 auto f = [](AReal x) { return x * x; };
315 A.Map(f);
316}
317
318//____________________________________________________________________________
319template <typename AReal>
321{
322 auto f = [](AReal x) { return sqrt(x); };
323 A.Map(f);
324}
325
326/// Adam updates
327//____________________________________________________________________________
328template<typename AReal>
329void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
330{
331 // ADAM update the weights.
332 // Weight = Weight - alpha * M / (sqrt(V) + epsilon)
333 AReal * a = A.GetRawDataPointer();
334 const AReal * m = M.GetRawDataPointer();
335 const AReal * v = V.GetRawDataPointer();
336 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
337 a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
338 }
339}
340
341//____________________________________________________________________________
342template<typename AReal>
344{
345 // First momentum weight gradient update for ADAM
346 // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients
347 AReal * a = A.GetRawDataPointer();
348 const AReal * b = B.GetRawDataPointer();
349 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
350 a[index] = beta * a[index] + (1.-beta) * b[index];
351 }
352}
353//____________________________________________________________________________
354template<typename AReal>
356{
357 // Second momentum weight gradient update for ADAM
358 // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2
359 AReal * a = A.GetRawDataPointer();
360 const AReal * b = B.GetRawDataPointer();
361 for (size_t index = 0; index < A.GetNoElements() ; ++index) {
362 a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];
363 }
364}
365
366
367} // DNN
368} // TMVA
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
unsigned int UInt_t
Definition: RtypesCore.h:42
#define R__ASSERT(e)
Definition: TError.h:96
void Fatal(const char *location, const char *msgfmt,...)
double sqrt(double)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107
The TCpuMatrix class.
Definition: CpuMatrix.h:87
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:162
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:187
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition: Arithmetic.hxx:74
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Arithmetic.hxx:234
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:288
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:205
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.hxx:187
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.hxx:145
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:320
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition: Arithmetic.hxx:39
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:355
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:255
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:312
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:343
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:296
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:304
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.hxx:329
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.hxx:76
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:25
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
Definition: TMatrixT.cxx:852
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition: TMatrixT.cxx:648
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.
create variable transformations
auto * m
Definition: textangle.C:8
auto * a
Definition: textangle.C:12
REAL epsilon
Definition: triangle.c:617