doc/v614/Arithmetic_8cxx_source.html

 // @(#)root/tmva/tmva/dnn:$Id$
 // Author: Simon Pfreundschuh 20/07/16

 /*************************************************************************
  * Copyright (C) 2016, Simon Pfreundschuh                                *
  * All rights reserved.                                                  *
  *                                                                       *
  * For the licensing terms see $ROOTSYS/LICENSE.                         *
  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
  *************************************************************************/

 ////////////////////////////////////////////////////////////
 //  Implementation of Helper arithmetic functions for the //
 // multi-threaded CPU implementation of DNNs.             //
 ////////////////////////////////////////////////////////////

 #include "TMVA/DNN/Architectures/Cpu.h"
 #include "TMVA/DNN/Architectures/Cpu/Blas.h"
 #include "tbb/tbb.h"

 namespace TMVA
 {
 namespace DNN
 {

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::Multiply(TCpuMatrix<Real_t> &C,
                             const TCpuMatrix<Real_t> &A,
                             const TCpuMatrix<Real_t> &B)
 {
     int m = (int) A.GetNrows();
     int k = (int) A.GetNcols();
     int n = (int) B.GetNcols();

     R__ASSERT((int) C.GetNrows() == m);
     R__ASSERT((int) C.GetNcols() == n);
     R__ASSERT((int) B.GetNrows() == k);

     char transa = 'N';
     char transb = 'N';

     Real_t alpha = 1.0;
     Real_t beta  = 0.0;

     const Real_t * APointer = A.GetRawDataPointer();
     const Real_t * BPointer = B.GetRawDataPointer();
           Real_t * CPointer = C.GetRawDataPointer();

     ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
                             APointer, &m, BPointer, &k, &beta, CPointer, &m);
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::TransposeMultiply(TCpuMatrix<Real_t> &C,
                                      const TCpuMatrix<Real_t> &A,
                                      const TCpuMatrix<Real_t> &B,
                                      Real_t alpha, Real_t beta)
 {
     int m = (int) A.GetNcols();
     int k = (int) A.GetNrows();
     int n = (int) B.GetNcols();

     R__ASSERT((int) C.GetNrows() == m);
     R__ASSERT((int) C.GetNcols() == n);
     R__ASSERT((int) B.GetNrows() == k);

     char transa = 'T';
     char transb = 'N';

     //Real_t alpha = 1.0;
     //Real_t beta  = 0.0;

     const Real_t *APointer = A.GetRawDataPointer();
     const Real_t *BPointer = B.GetRawDataPointer();
           Real_t *CPointer = C.GetRawDataPointer();

     ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
                             APointer, &k, BPointer, &k, &beta, CPointer, &m);
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::Hadamard(TCpuMatrix<Real_t> &B,
                             const TCpuMatrix<Real_t> &A)
 {
    const Real_t *dataA      = A.GetRawDataPointer();
    Real_t *dataB      = B.GetRawDataPointer();

    size_t nElements =  A.GetNElements();
    R__ASSERT(B.GetNElements() == nElements);
    size_t nSteps = TCpuMatrix<Real_t>::GetNWorkItems(nElements);

    auto f = [&](UInt_t workerID)
    {
       for (size_t j = 0; j < nSteps; ++j) {
          size_t idx = workerID+j;
          if (idx >= nElements) break;
          dataB[idx] *= dataA[idx];
       }
       return 0;
    };

 #ifdef DL_USE_MTE
    B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
 #else
    for (size_t i = 0;  i < nElements ; i+= nSteps)
       f(i);
 #endif
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::SumColumns(TCpuMatrix<Real_t> &B,
                               const TCpuMatrix<Real_t> &A,
                               Real_t alpha, Real_t beta)
 {
    int m = (int) A.GetNrows();
    int n = (int) A.GetNcols();
    int inc = 1;

    // Real_t alpha = 1.0;
    //Real_t beta  = 0.0;
    char   trans   = 'T';

    const Real_t * APointer = A.GetRawDataPointer();
          Real_t * BPointer = B.GetRawDataPointer();

    ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
                            TCpuMatrix<Real_t>::GetOnePointer(), &inc,
                            &beta, BPointer, &inc);
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::ScaleAdd(TCpuMatrix<Real_t> &B,
                             const TCpuMatrix<Real_t> &A,
                             Real_t alpha)
 {
    int n = (int) (A.GetNcols() * A.GetNrows());
    int inc = 1;

    const Real_t *x = A.GetRawDataPointer();
    Real_t *y = B.GetRawDataPointer();

    ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::Copy(TCpuMatrix<Real_t> &B,
                         const TCpuMatrix<Real_t> &A)
 {
    auto f = [](Real_t x) {return x;};
    B.MapFrom(f, A);
 }


 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::ScaleAdd(std::vector<TCpuMatrix<Real_t>> &B,
                             const std::vector<TCpuMatrix<Real_t>> &A,
                             Real_t alpha)
 {
    for (size_t i = 0; i < B.size(); ++i) {
       ScaleAdd(B[i], A[i], alpha);
    }
 }

 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::Copy(std::vector<TCpuMatrix<Real_t>> &B,
                             const std::vector<TCpuMatrix<Real_t>> &A)
 {
    for (size_t i = 0; i < B.size(); ++i) {
       Copy(B[i], A[i]);
    }
 }


 } // DNN
 } // TMVA
ROOT::TThreadExecutor::Foreach
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
Definition: TThreadExecutor.hxx:116

ROOT::Math::Cephes::B
static double B[]
Definition: SpecFuncCephes.cxx:178

TMVA::DNN::TCpu::ScaleAdd
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:72

m
auto * m
Definition: textangle.C:8

Cpu.h

TMVA::DNN::TCpu::SumColumns
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Arithmetic.cxx:115

TMVA::DNN::TCpu::Copy
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:127

R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:96

Blas.h

f
#define f(i)
Definition: RSha256.hxx:104

TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:164

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Math::beta
double beta(double x, double y)
Calculates the beta function.
Definition: SpecFuncMathCore.cxx:111

TMVA::DNN::TCpuMatrix::GetNElements
size_t GetNElements() const
Definition: CpuMatrix.h:128

TMVA::DNN::Blas::Gemm
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.

x
Double_t x[n]
Definition: legend1.C:17

TMVA::DNN::TCpu::Multiply
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C...
Definition: Arithmetic.cxx:28

TMVA::DNN::TCpu::Hadamard
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
Definition: Arithmetic.cxx:85

TMVA::DNN::Blas::Axpy
void Axpy(const int *n, const Real_t *alpha, const Real_t *x, const int *incx, Real_t *y, const int *incy)
Add the vector x scaled by alpha to y scaled by .

ROOT::Math::Cephes::C
static double C[]
Definition: SpecFuncCephes.cxx:187

TMVA::DNN::TCpuMatrix::MapFrom
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix...
Definition: CpuMatrix.h:205

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:139

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:136

TMVA::DNN::Blas::Gemv
void Gemv(const char *trans, const int *m, const int *n, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *x, const int *incx, const Real_t *beta, Real_t *y, const int *incy)
Multiply the vector x with the matrix A and store the result in y.

ROOT::Math::GSLSimAn::Copy
void Copy(void *source, void *dest)
Definition: GSLSimAnnealing.cxx:149

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

y
Double_t y[n]
Definition: legend1.C:17

Real_t
float Real_t
Definition: RtypesCore.h:64

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:126

TMVA::DNN::TCpu::TransposeMultiply
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
Definition: Arithmetic.cxx:56

n
const Int_t n
Definition: legend1.C:16