Logo ROOT  
Reference Guide
CpuMatrix.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////
13 // Definition of the CpuMatrix class used to represent //
14 // weight and bias matrices in neural nets. //
15 //////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19 
20 #ifdef R__USE_IMT
21 #define DL_USE_MTE // use MT with tbb
22 #endif
23 
24 #include <cstddef>
25 #include <vector>
26 
27 #include "TMatrix.h"
28 #include "TMVA/Config.h"
29 #include "CpuBuffer.h"
30 
31 // #define DEBUG_TMVA_TCPUMATRIX
32 #if defined(DEBUG_TMVA_TCPUMATRIX)
33 /*
34  * Debug(!) function for printing matrices.
35  *
36  * Prints the input expression `mat` using preprocessor directives (with
37  * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate
38  * "matA is null pointer".
39  *
40  * Note: This is a preprocessor macro. It does _not_ respect namespaces.
41  *
42  * @param mat Matrix to print
43  * @param text Name of matrix
44  */
45 #define TMVA_DNN_PrintTCpuMatrix(mat, text) \
46  { \
47  auto _dpointer = mat.GetRawDataPointer(); \
48  if (_dpointer == NULL) { \
49  std::cout << #mat << " is null pointer" << std::endl; \
50  exit(1); \
51  } \
52  auto _nrows = mat.GetNrows(); \
53  auto _ncols = mat.GetNcols(); \
54  std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
55  << "--------------------" << std::endl; \
56  for (size_t _i = 0; _i < _nrows; _i++) { \
57  for (size_t _j = 0; _j < _ncols; _j++) { \
58  std::cout << mat(_i, _j); \
59  if (_j < _ncols - 1) \
60  std::cout << ","; \
61  } \
62  std::cout << std::endl; \
63  } \
64  }
65 #else
66 #define TMVA_DNN_PrintTCpuMatrix(mat, text)
67 #endif
68 
69 namespace TMVA {
70 namespace DNN {
71 
72 /** The TCpuMatrix class.
73  *
74  * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
75  * class to store the matrices in column-major format for compatibility with
76  * BLAS. Provides Map and MapFrom member functions to simplify the application of
77  * activation functions and derivatives to matrices.
78  *
79  * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
80  * copying is fast and the resulting objects share the element data.
81  *
82  * \tparam AFloat The floating point type used to represent the matrix elements.
83  */
84 //______________________________________________________________________________
85 template <typename AFloat>
86 class TCpuMatrix {
87 private:
88  static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
89 
90 public:
91  TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
92  ///< in column-major format.
93 private:
94  size_t fNCols;
95  size_t fNRows;
96 
97 public:
98  // friend class TCpuTensor<AFloat>;
99 
100  /** Returns pointer to a vector holding only ones with a guaranteed length
101  * of the number of columns of every instantiated CpuMatrix object. */
102 
103 
105  const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}
106  // for compatible API with Tensor and Matrix in Cuda
108  const TCpuBuffer<AFloat> &GetDeviceBuffer() const { return fBuffer; }
109 
110  static const AFloat *GetOnePointer() { return fOnes.data(); }
111 
112  static size_t GetOnePointerSize() { return fOnes.size(); }
113 
114  static void InitializeOneVector(size_t n);
115 
116  TCpuMatrix() : fNCols(0), fNRows(0) {}
117 
118  /** Construct matrix and allocate space for its elements. */
119  TCpuMatrix(size_t nRows, size_t nCols);
120  /** Construct a TCpuMatrix object by (deeply) copying from a
121  * TMatrixT<Double_t> matrix. */
122  TCpuMatrix(const TMatrixT<AFloat> &);
123  /** Construct a m-times-n matrix from the given buffer. The size must of
124  * course match. */
125  TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
126 
127  /** copy from a TMAtrixT . Deep copy without re-creating a new buffer */
129 
130  // N.B the default copy constructor does a shallow copy (NOT a deep one) !
131  TCpuMatrix(const TCpuMatrix &) = default;
132  TCpuMatrix(TCpuMatrix &&) = default;
133  TCpuMatrix &operator=(const TCpuMatrix &) = default;
134  TCpuMatrix &operator=(TCpuMatrix &&) = default;
135  ~TCpuMatrix() = default;
136 
137  /** Clear content of the matrix and initialize to zero elements
138  */
139  void Zero();
140 
141  /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
142  * elements. */
143  operator TMatrixT<AFloat>() const;
144 
145  /** Map the given function over the matrix elements. Executed in parallel
146  * using TThreadExecutor. */
147  template <typename Function_t>
148  void Map(Function_t &f);
149 
150  /** Same as maps but takes the input values from the matrix \p A and writes
151  * the results in this matrix. */
152  template <typename Function_t>
153  void MapFrom(Function_t &f, const TCpuMatrix &A);
154 
155  size_t GetNrows() const { return fNRows; }
156  size_t GetNcols() const { return fNCols; }
157  size_t GetNoElements() const { return fNRows * fNCols; }
158  size_t GetSize() const { return fNRows * fNCols; }
159 
160  /** Return matrix element in row \p i and column \p j. */
161  AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }
162  AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }
163 
164  /** Return raw pointer to the elements stored contiguously in column-major
165  * order. */
166  AFloat *GetRawDataPointer() { return fBuffer; }
167  const AFloat *GetRawDataPointer() const { return fBuffer; }
168 
170 
171  // static function to get the number of elements for task
172  static size_t GetNWorkItems(size_t nelements);
173 
174  // print matrix
175  void Print() const
176  {
177  TCpuMatrix cpuMatrix = *this;
178  TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");
179  }
180 
181 private:
182  void Initialize();
183 };
184 
185 template <typename AFloat>
186 std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};
187 
188 // Inline Functions.
189 //______________________________________________________________________________
190 template <typename AFloat>
191 size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
192 {
193  // nElements should have at least 100
194  // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
195  // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
196  const size_t minElements = 1000;
197  const size_t nCpu = TMVA::Config::Instance().GetNCpu();
198  if (nElements <= minElements)
199  return nElements;
200  if (nElements < nCpu * minElements) {
201  size_t nt = nElements / minElements;
202  return nElements / nt;
203  }
204  return nElements / nCpu;
205  // if (nElements < nCpu*20) return nElements/nCpu;
206  // return nElements/(nCpu*10);
207 }
208 
209 //______________________________________________________________________________
210 template <typename AFloat>
211 template <typename Function_t>
212 inline void TCpuMatrix<AFloat>::Map(Function_t &f)
213 {
214  AFloat *data = GetRawDataPointer();
215  size_t nelements = GetNoElements();
216  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
217 
218  auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
219  size_t jMax = std::min(workerID + nsteps, nelements);
220  for (size_t j = workerID; j < jMax; ++j) {
221  data[j] = f(data[j]);
222  }
223  return 0;
224  };
225 
226  if (nsteps < nelements) {
227  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
228 
229  // for (size_t i = 0; i < nelements; i+=nsteps)
230  // ff(i);
231 
232  } else {
233  R__ASSERT(nelements == nsteps);
234  ff(0);
235  }
236 }
237 
238 //______________________________________________________________________________
239 template <typename AFloat>
240 template <typename Function_t>
241 inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
242 {
243  AFloat *dataB = GetRawDataPointer();
244  const AFloat *dataA = A.GetRawDataPointer();
245 
246  size_t nelements = GetNoElements();
247  R__ASSERT(nelements == A.GetNoElements());
248  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
249 
250  auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
251  size_t jMax = std::min(workerID + nsteps, nelements);
252  for (size_t j = workerID; j < jMax; ++j) {
253  dataB[j] = f(dataA[j]);
254  }
255  return 0;
256  };
257  if (nsteps < nelements) {
258  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
259  // for (size_t i = 0; i < nelements; i+=nsteps)
260  // ff(i);
261 
262  } else {
263  R__ASSERT(nelements == nsteps);
264  ff(0);
265  }
266 }
267 //______________________________________________________________________________
268 template <typename AFloat>
270 {
271  for (size_t j = 0; j < fNCols; j++) {
272  for (size_t i = 0; i < fNRows; i++) {
273  (*this)(i, j) = 0;
274  }
275  }
276 }
277 
278 } // namespace DNN
279 } // namespace TMVA
280 
281 #endif
m
auto * m
Definition: textangle.C:8
CpuBuffer.h
n
const Int_t n
Definition: legend1.C:16
TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:156
TMVA::DNN::TCpuMatrix::MapFrom
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:241
TMVA::DNN::TCpuMatrix::~TCpuMatrix
~TCpuMatrix()=default
TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:155
f
#define f(i)
Definition: RSha256.hxx:122
TMVA::DNN::TCpuMatrix::Zero
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:269
TMVA::DNN::TCpuMatrix::Initialize
void Initialize()
Definition: CpuMatrix.cxx:97
TMVA::DNN::TCpuMatrix::Print
void Print() const
Definition: CpuMatrix.h:175
TMVA::Config::GetThreadExecutor
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:106
TMVA::DNN::TCpuMatrix::GetBuffer
const TCpuBuffer< AFloat > & GetBuffer() const
Definition: CpuMatrix.h:105
TMVA::Executor
Base Excutor class.
Definition: Executor.h:42
TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:86
TMVA::DNN::TCpuMatrix::GetThreadExecutor
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:169
TMVA::DNN::TCpuMatrix::GetRawDataPointer
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:167
TMVA::DNN::TCpuMatrix::operator()
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:161
ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170
TMatrixT
Definition: TMatrixDfwd.h:22
TMatrix.h
TMVA::Config::Instance
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:105
TMVA::DNN::TCpuMatrix::operator=
TCpuMatrix< AFloat > & operator=(const TMatrixT< AFloat > &)
copy from a TMAtrixT .
TMVA::DNN::TCpuMatrix::GetNWorkItems
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:191
TMVA::DNN::TCpuMatrix::GetSize
size_t GetSize() const
Definition: CpuMatrix.h:158
TMVA::DNN::TCpuMatrix::GetDeviceBuffer
TCpuBuffer< AFloat > & GetDeviceBuffer()
Definition: CpuMatrix.h:107
TMVA::DNN::TCpuMatrix::InitializeOneVector
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:110
TMVA::Executor::Foreach
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:123
TMVA::DNN::TCpuMatrix::GetBuffer
TCpuBuffer< AFloat > & GetBuffer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:104
TMVA_DNN_PrintTCpuMatrix
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:66
Config.h
unsigned int
TMVA::DNN::TCpuMatrix::GetOnePointer
static const AFloat * GetOnePointer()
Definition: CpuMatrix.h:110
TMVA::DNN::TCpuMatrix::fBuffer
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:91
TMVA::DNN::TCpuMatrix::Map
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:212
R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:120
TMVA::DNN::TCpuMatrix::GetOnePointerSize
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:112
TMVA::DNN::TCpuMatrix::TCpuMatrix
TCpuMatrix()
Definition: CpuMatrix.h:116
TMVA::DNN::TCpuMatrix::fNCols
size_t fNCols
Definition: CpuMatrix.h:94
ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:157
TMVA::DNN::TCpuMatrix::operator()
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:162
TMVA::DNN::TCpuMatrix::GetDeviceBuffer
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
Definition: CpuMatrix.h:108
TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:166
TMVA::DNN::TCpuMatrix::fNRows
size_t fNRows
Definition: CpuMatrix.h:95
TMVA::DNN::TCpuBuffer
TCpuBuffer.
Definition: CpuBuffer.h:55
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::Config::GetNCpu
UInt_t GetNCpu()
Definition: Config.h:95
TMVA::DNN::TCpuMatrix::fOnes
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:88