doc/v620/Cpu_2Arithmetic_8hxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


////////////////////////////////////////////////////////////

//  Implementation of Helper arithmetic functions for the //

// multi-threaded CPU implementation of DNNs.             //

////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Cpu.h"


#ifdef R__HAS_TMVACPU

#include "TMVA/DNN/Architectures/Cpu/Blas.h"

#else

#include "TMVA/DNN/Architectures/Reference.h"

#endif


#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wshadow"


//#include "tbb/tbb.h"


#pragma GCC diagnostic pop


namespace TMVA

{

namespace DNN

{


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,

                            const TCpuMatrix<AReal> &A,

                            const TCpuMatrix<AReal> &B)

{

    int m = (int) A.GetNrows();

    int k = (int) A.GetNcols();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


#ifdef R__HAS_TMVACPU


    char transa = 'N';

    char transb = 'N';


    AReal alpha = 1.0;

    AReal beta  = 0.0;


    const AReal * APointer = A.GetRawDataPointer();

    const AReal * BPointer = B.GetRawDataPointer();

          AReal * CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &m, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.Mult(A,B);

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,

                                     const TCpuMatrix<AReal> &A,

                                     const TCpuMatrix<AReal> &B,

                                     AReal alpha, AReal beta)

{

#ifdef R__HAS_TMVACPU

    int m = (int) A.GetNcols();

    int k = (int) A.GetNrows();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


    char transa = 'T';

    char transb = 'N';


    //AReal alpha = 1.0;

    //AReal beta  = 0.0;


    const AReal *APointer = A.GetRawDataPointer();

    const AReal *BPointer = B.GetRawDataPointer();

          AReal *CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &k, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.TMult(A,B);

   tmp = alpha*tmp + beta;

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/// \brief Checks two matrices for element-wise equality.

/// \tparam AReal An architecture-specific floating point number type.

/// \param A The first matrix.

/// \param B The second matrix.

/// \param epsilon Equality tolerance, needed to address floating point arithmetic.

/// \return Whether the two matrices can be considered equal element-wise

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

template<typename AReal>

bool TCpu<AReal>::AlmostEquals(const TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, double epsilon)

{

    if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {

        Fatal("AlmostEquals", "The passed matrices have unequal shapes.");

    }


    const AReal *dataA = A.GetRawDataPointer();

    const AReal *dataB = B.GetRawDataPointer();

    size_t nElements =  A.GetNoElements();


    for(size_t i = 0; i < nElements; i++) {

        if(fabs(dataA[i] - dataB[i]) > epsilon) return false;

    }

    return true;

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,

                              const TCpuMatrix<AReal> &A,

                              AReal alpha, AReal beta)

{

#ifdef R__HAS_TMVACPU

   int m = (int) A.GetNrows();

   int n = (int) A.GetNcols();

   int inc = 1;


   // AReal alpha = 1.0;

   //AReal beta  = 0.0;

   char   trans   = 'T';


   const AReal * APointer = A.GetRawDataPointer();

         AReal * BPointer = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,

                           TCpuMatrix<AReal>::GetOnePointer(), &inc,

                           &beta, BPointer, &inc);

#else

   TMatrixT<AReal> tmp(B.GetNrows(), B.GetNcols());

   TReference<AReal>::SumColumns(tmp,A);

   tmp = alpha*tmp + beta;

   B = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A,

                            AReal alpha)

{

#ifdef R__HAS_TMVACPU

   int n = (int) (A.GetNcols() * A.GetNrows());

   int inc = 1;


   const AReal *x = A.GetRawDataPointer();

   AReal *y = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);

#else

   TMatrixT<AReal> tmp;

   TReference<AReal>::ScaleAdd(tmp, A, alpha);

   B = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,

                        const TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A,

                            AReal alpha)

{

   // should re-implemented at tensor level

   for (size_t i = 0; i < B.GetFirstSize(); ++i) {

      TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();

      ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{


   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x + beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x * beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return 1.0 / x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return x * x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>

void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return sqrt(x); };

   A.Map(f);

}


/// Adam updates

//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)

{

   // ADAM update the weights.

   // Weight = Weight - alpha * M / (sqrt(V) + epsilon)

   AReal * a = A.GetRawDataPointer();

   const AReal * m = M.GetRawDataPointer();

   const AReal * v = V.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);

   }

}


//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // First momentum weight gradient update for ADAM

   // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index];

   }

}

//____________________________________________________________________________

template<typename AReal>

void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // Second momentum weight gradient update for ADAM

   // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];

   }

}


} // DNN

} // TMVA

Blas.h

Cpu.h

b
#define b(i)
Definition: RSha256.hxx:100

f
#define f(i)
Definition: RSha256.hxx:104

Reference.h

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

Fatal
void Fatal(const char *location, const char *msgfmt,...)

sqrt
double sqrt(double)

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

TMVA::Config::GetThreadExecutor
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83

TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:87

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:162

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:187

TMVA::DNN::TCpuTensor
Definition: CpuTensor.h:41

TMVA::DNN::TCpu::TransposeMultiply
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition: Arithmetic.hxx:74

TMVA::DNN::TCpu::ScaleAdd
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Arithmetic.hxx:234

TMVA::DNN::TCpu::ConstAdd
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:288

TMVA::DNN::TCpu::SumColumns
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:205

TMVA::DNN::TCpu::AlmostEquals
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.hxx:187

TMVA::DNN::TCpu::Hadamard
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.hxx:145

TMVA::DNN::TCpu::SqrtElementWise
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:320

TMVA::DNN::TCpu::Multiply
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition: Arithmetic.hxx:39

TMVA::DNN::TCpu::AdamUpdateSecondMom
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:355

TMVA::DNN::TCpu::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:255

TMVA::DNN::TCpu::SquareElementWise
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:312

TMVA::DNN::TCpu::AdamUpdateFirstMom
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:343

TMVA::DNN::TCpu::ConstMult
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:296

TMVA::DNN::TCpu::ReciprocalElementWise
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:304

TMVA::DNN::TCpu::AdamUpdate
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.hxx:329

TMVA::DNN::TReference::ScaleAdd
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.hxx:76

TMVA::DNN::TReference::SumColumns
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:25

TMVA::Executor::Foreach
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110

TMatrixT< AReal >

TMatrixT::TMult
void TMult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
Create a matrix C such that C = A' * B.
Definition: TMatrixT.cxx:852

TMatrixT::Mult
void Mult(const TMatrixT< Element > &a, const TMatrixT< Element > &b)
General matrix multiplication. Create a matrix C such that C = A * B.
Definition: TMatrixT.cxx:648

ROOT::Math::beta
double beta(double x, double y)
Calculates the beta function.
Definition: SpecFuncMathCore.cxx:111

y
Double_t y[n]
Definition: legend1.C:17

x
Double_t x[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Math::Cephes::C
static double C[]
Definition: SpecFuncCephes.cxx:187

ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131

TMVA::DNN::Blas::Axpy
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta.

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

TMVA::DNN::Blas::Gemv
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

v
@ v
Definition: rootcling_impl.cxx:3622

m
auto * m
Definition: textangle.C:8

a
auto * a
Definition: textangle.C:12

epsilon
REAL epsilon
Definition: triangle.c:617