doc/master/Cpu_2Arithmetic_8hxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


////////////////////////////////////////////////////////////

//  Implementation of Helper arithmetic functions for the //

// multi-threaded CPU implementation of DNNs.             //

////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Cpu.h"


#ifdef R__HAS_TMVACPU

#include "Blas.h"

#else

#include "TMVA/DNN/Architectures/Reference.h"

#include "TVectorT.h"

#endif


#if defined(__GNUC__)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wshadow"


//#include "tbb/tbb.h"


#pragma GCC diagnostic pop

#endif


namespace TMVA

{

namespace DNN

{


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,

                            const TCpuMatrix<AReal> &A,

                            const TCpuMatrix<AReal> &B)

{

    int m = (int) A.GetNrows();

    int k = (int) A.GetNcols();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


#ifdef R__HAS_TMVACPU


    char transa = 'N';

    char transb = 'N';


    AReal alpha = 1.0;

    AReal beta  = 0.0;


    const AReal * APointer = A.GetRawDataPointer();

    const AReal * BPointer = B.GetRawDataPointer();

          AReal * CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &m, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.Mult(A,B);

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,

                                     const TCpuMatrix<AReal> &A,

                                     const TCpuMatrix<AReal> &B,

                                     AReal alpha, AReal beta)

{

#ifdef R__HAS_TMVACPU

    int m = (int) A.GetNcols();

    int k = (int) A.GetNrows();

    int n = (int) B.GetNcols();


    R__ASSERT((int) C.GetNrows() == m);

    R__ASSERT((int) C.GetNcols() == n);

    R__ASSERT((int) B.GetNrows() == k);


    char transa = 'T';

    char transb = 'N';


    //AReal alpha = 1.0;

    //AReal beta  = 0.0;


    const AReal *APointer = A.GetRawDataPointer();

    const AReal *BPointer = B.GetRawDataPointer();

          AReal *CPointer = C.GetRawDataPointer();


    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,

                            APointer, &k, BPointer, &k, &beta, CPointer, &m);

#else

   TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());

   tmp.TMult(A, B);

   tmp = alpha * tmp;

   if (beta != 0.0) {

      TMatrixT<AReal> tmp0(C);

      tmp = tmp + beta * tmp0;

   }

   C = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{

   const AReal *dataA      = A.GetRawDataPointer();

   AReal *dataB      = B.GetRawDataPointer();


   size_t nElements =  A.GetNoElements();

   R__ASSERT(B.GetNoElements() == nElements);

   size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);


   auto f = [&](UInt_t workerID)

   {

      for (size_t j = 0; j < nSteps; ++j) {

         size_t idx = workerID+j;

         if (idx >= nElements) break;

         dataB[idx] *= dataA[idx];

      }

      return 0;

   };


   if (nSteps < nElements) {

#ifdef DL_USE_MTE

      TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));

#else

      for (size_t i = 0;  i < nElements ; i+= nSteps)

         f(i);

#endif

   }

   else {

      f(0);

   }

}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/// \brief Checks two matrices for element-wise equality.

/// \tparam AReal An architecture-specific floating point number type.

/// \param A The first matrix.

/// \param B The second matrix.

/// \param epsilon Equality tolerance, needed to address floating point arithmetic.

/// \return Whether the two matrices can be considered equal element-wise

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

template<typename AReal>


bool TCpu<AReal>::AlmostEquals(const TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, double epsilon)

{

    if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {

        Fatal("AlmostEquals", "The passed matrices have unequal shapes.");

    }


    const AReal *dataA = A.GetRawDataPointer();

    const AReal *dataB = B.GetRawDataPointer();

    size_t nElements =  A.GetNoElements();


    for(size_t i = 0; i < nElements; i++) {

        if(fabs(dataA[i] - dataB[i]) > epsilon) return false;

    }

    return true;

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,

                              const TCpuMatrix<AReal> &A,

                              AReal alpha, AReal beta)

{


   int m = (int) A.GetNrows();

   int n = (int) A.GetNcols();


   assert((int) B.GetNoElements() >= n);


#ifdef R__HAS_TMVACPU

   int inc = 1;

   char   trans   = 'T';


   const AReal * APointer = A.GetRawDataPointer();

         AReal * BPointer = B.GetRawDataPointer();


   // compute B = alpha * A * I + beta * B


   ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,

                           TCpuMatrix<AReal>::GetOnePointer(), &inc,

                           &beta, BPointer, &inc);

#else

   TMatrixT<AReal> tA(A);

   tA.T();

   TVectorT<AReal> ones(m, TCpuMatrix<AReal>::GetOnePointer());

   TVectorT<AReal> tmp(n, B.GetRawDataPointer());

   assert(B.GetNrows() == 1 || B.GetNcols() == 1);

   tmp = alpha * tA * ones + beta * tmp;

   // copy result buffer in B matrix

   std::copy(tmp.GetMatrixArray(), tmp.GetMatrixArray() + n, B.GetRawDataPointer());

#endif

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,

                            const TCpuMatrix<AReal> &A,

                            AReal alpha)

{

#ifdef R__HAS_TMVACPU

   int n = (int) (A.GetNcols() * A.GetNrows());

   int inc = 1;


   const AReal *x = A.GetRawDataPointer();

   AReal *y = B.GetRawDataPointer();


   ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);

#else

   TMatrixT<AReal> tmp(B);

   TReference<AReal>::ScaleAdd(tmp, A, alpha);

   B = tmp;

#endif

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,

                        const TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A,

                            AReal alpha)

{

   // should re-implemented at tensor level

   for (size_t i = 0; i < B.GetFirstSize(); ++i) {

      TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();

      ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);

   }

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,

                            const TCpuTensor<AReal> &A)

{


   auto f = [](AReal x) {return x;};

   B.MapFrom(f, A);

}


//____________________________________________________________________________

template <typename AReal>


void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x + beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>


void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)

{

   auto f = [beta](AReal x) { return x * beta; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>


void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return 1.0 / x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>


void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return x * x; };

   A.Map(f);

}


//____________________________________________________________________________

template <typename AReal>


void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)

{

   auto f = [](AReal x) { return sqrt(x); };

   A.Map(f);

}


/// Adam updates

//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)

{

   // ADAM update the weights.

   // Weight = Weight - alpha * M / (sqrt(V) + epsilon)

   AReal * a = A.GetRawDataPointer();

   const AReal * m = M.GetRawDataPointer();

   const AReal * v = V.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);

   }

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // First momentum weight gradient update for ADAM

   // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index];

   }

}


//____________________________________________________________________________

template<typename AReal>


void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)

{

   // Second momentum weight gradient update for ADAM

   // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2

   AReal * a = A.GetRawDataPointer();

   const AReal * b = B.GetRawDataPointer();

   for (size_t index = 0; index < A.GetNoElements() ; ++index) {

      a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];

   }

}


} // DNN

} // TMVA

Blas.h

Cpu.h

b
#define b(i)
Definition RSha256.hxx:100

f
#define f(i)
Definition RSha256.hxx:104

a
#define a(i)
Definition RSha256.hxx:99

Reference.h

TRangeDynCast
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Definition TCollection.h:360

R__ASSERT
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125

Fatal
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition TError.cxx:267

index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Definition TGWin32VirtualXProxy.cxx:168

TVectorT.h

AReal

ROOT::Detail::TRangeCast
Definition TCollection.h:313

TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition Config.cxx:97

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition CpuMatrix.h:86

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition CpuMatrix.h:191

TMVA::DNN::TCpu::TransposeMultiply
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition Arithmetic.hxx:77

TMVA::DNN::TCpu::ScaleAdd
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition Arithmetic.hxx:248

TMVA::DNN::TCpu::ConstAdd
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition Arithmetic.hxx:302

TMVA::DNN::TCpu::SumColumns
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
Definition Arithmetic.hxx:212

TMVA::DNN::TCpu::AlmostEquals
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition Arithmetic.hxx:194

TMVA::DNN::TCpu::Hadamard
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition Arithmetic.hxx:152

TMVA::DNN::TCpu::SqrtElementWise
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:334

TMVA::DNN::TCpu::Multiply
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition Arithmetic.hxx:42

TMVA::DNN::TCpu::AdamUpdateSecondMom
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition Arithmetic.hxx:369

TMVA::DNN::TCpu::Copy
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition Arithmetic.hxx:269

TMVA::DNN::TCpu::SquareElementWise
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:326

TMVA::DNN::TCpu::AdamUpdateFirstMom
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition Arithmetic.hxx:357

TMVA::DNN::TCpu::ConstMult
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition Arithmetic.hxx:310

TMVA::DNN::TCpu::ReciprocalElementWise
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition Arithmetic.hxx:318

TMVA::DNN::TCpu::AdamUpdate
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition Arithmetic.hxx:343

TMVA::DNN::TReference::ScaleAdd
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition Propagation.hxx:76

int

unsigned int

y
Double_t y[n]
Definition legend1.C:17

x
Double_t x[n]
Definition legend1.C:17

n
const Int_t n
Definition legend1.C:16

TMVA::DNN::Blas::Axpy
void Axpy(const int *n, const AReal *alpha, const AReal *x, const int *incx, AReal *y, const int *incy)
Add the vector x scaled by alpha to y scaled by \beta

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

TMVA::DNN::Blas::Gemv
void Gemv(const char *trans, const int *m, const int *n, const AReal *alpha, const AReal *A, const int *lda, const AReal *x, const int *incx, const AReal *beta, AReal *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

v
@ v
Definition rootcling_impl.cxx:3554

m
TMarker m
Definition textangle.C:8