Logo ROOT   6.14/05
Reference Guide
CpuMatrix.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////
13 // Definition of the CpuMatrix class used to represent //
14 // weight and bias matrices in neural nets. //
15 //////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19 
20 #include <cstddef>
21 #include <vector>
22 
23 #include "TMatrix.h"
24 #include "TMVA/Config.h"
25 #include "CpuBuffer.h"
26 #include <TMVA/Config.h>
27 
28 //#define DEBUG_TMVA_TCPUMATRIX
29 #if defined(DEBUG_TMVA_TCPUMATRIX)
30 #define PrintMatrix(mat, text) \
31  { \
32  auto _dpointer = mat.GetRawDataPointer(); \
33  if (_dpointer == NULL) { \
34  std::cout << #mat << " is null pointer" << std::endl; \
35  exit(1); \
36  } \
37  auto _nrows = mat.GetNrows(); \
38  auto _ncols = mat.GetNcols(); \
39  std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
40  << "--------------------" << std::endl; \
41  for (size_t _i = 0; _i < _nrows; _i++) { \
42  for (size_t _j = 0; _j < _ncols; _j++) { \
43  std::cout << mat(_i, _j); \
44  if (_j < _ncols - 1) std::cout << ","; \
45  } \
46  std::cout << std::endl; \
47  } \
48  }
49 #else
50 #define PrintMatrix(mat, text)
51 #endif
52 
53 namespace TMVA
54 {
55 namespace DNN
56 {
57 
58 /** The TCpuMatrix class.
59  *
60  * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
61  * class to store the matrices in column-major format for compatibility with
62  * BLAS. Provides Map and MapFrom member functions to simplify the application of
63  * activation functions and derivatives to matrices.
64  *
65  * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
66  * copying is fast and the resulting objects share the element data.
67  *
68  * \tparam AFloat The floating point type used to represent the matrix elements.
69  */
70 //______________________________________________________________________________
71 template<typename AFloat>
73 {
74 private:
75  static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
76 
77  TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
78  ///< in column-major format.
79  size_t fNCols;
80  size_t fNRows;
81 
82 public:
83 
84  /** Returns pointer to a vector holding only ones with a guaranteed length
85  * of the number of columns of every instantiated CpuMatrix object. */
86  static const AFloat * GetOnePointer() {return fOnes.data();}
87 
88  static size_t GetOnePointerSize() { return fOnes.size(); }
89 
90  static void InitializeOneVector( size_t n);
91 
92  /** Construct matrix and allocate space for its elements. */
93  TCpuMatrix(size_t nRows, size_t nCols);
94  /** Construct a TCpuMatrix object by (deeply) copying from a
95  * TMatrixT<Double_t> matrix. */
97  /** Construct a m-times-n matrix from the given buffer. The size must of
98  * course match. */
99  TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
100 
101  //N.B the default copy constructor does a shallow copy (NOT a deep one) !
102  TCpuMatrix(const TCpuMatrix &) = default;
103  TCpuMatrix( TCpuMatrix &&) = default;
104  TCpuMatrix & operator=(const TCpuMatrix &) = default;
105  TCpuMatrix & operator=(TCpuMatrix &&) = default;
106  ~TCpuMatrix() = default;
107 
108  /** Clear content of the matrix and initialize to zero elements
109  */
110  void Zero();
111 
112  /** Convert to a TMatrixT<Double_t> object. Performs a deep copy of the matrix
113  * elements. */
114  operator TMatrixT<Double_t>() const;
115 
116  /** Map the given function over the matrix elements. Executed in parallel
117  * using TThreadExecutor. */
118  template <typename Function_t>
119  void Map(Function_t &f);
120 
121  /** Same as maps but takes the input values from the matrix \p A and writes
122  * the results in this matrix. */
123  template <typename Function_t>
124  void MapFrom(Function_t &f, const TCpuMatrix & A);
125 
126  size_t GetNrows() const {return fNRows;}
127  size_t GetNcols() const {return fNCols;}
128  size_t GetNElements() const {return fNRows * fNCols;}
129 
130  /** Return matrix element in row \p i and column \p j. */
131  AFloat operator()(size_t i, size_t j) const {return fBuffer[j * fNRows + i];}
132  AFloat & operator()(size_t i, size_t j) {return fBuffer[j * fNRows + i];}
133 
134  /** Return raw pointer to the elements stored contiguously in column-major
135  * order. */
136  AFloat * GetRawDataPointer() {return fBuffer;}
137  const AFloat * GetRawDataPointer() const {return fBuffer;}
138 
140 
141  // static function to get the number of elements for task
142  static size_t GetNWorkItems(size_t nelements);
143 
144  // print matrix
145  void Print() const {
146  TCpuMatrix cpuMatrix = *this;
147  PrintMatrix(cpuMatrix,"CpuMatrix");
148  }
149 
150 
151 private:
152 
153  void Initialize();
154 
155 };
156 
157 template<typename AFloat>
158 std::vector<AFloat> TCpuMatrix<AFloat>::fOnes {};
159 
160 
161 // Inline Functions.
162 //______________________________________________________________________________
163 template<typename AFloat>
164 size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
165 {
166  // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
167  // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
168  const size_t nCpu = TMVA::Config::Instance().GetNCpu();
169  if (nElements <= nCpu) return 1;
170  if (nElements < nCpu*20) return nElements/nCpu;
171  return nElements/(nCpu*10);
172 }
173 
174 
175 //______________________________________________________________________________
176 template<typename AFloat>
177 template<typename Function_t>
178 inline void TCpuMatrix<AFloat>::Map(Function_t &f)
179 {
180  AFloat *data = GetRawDataPointer();
181  size_t nelements = GetNElements();
182  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
183 
184  auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID)
185  {
186  for (size_t j = 0; j < nsteps; ++j) {
187  size_t idx = workerID+j;
188  if (idx >= nelements) break;
189  data[idx] = f(data[idx]);
190  }
191  return 0;
192  };
193 
194 #ifdef DL_USE_MTE
195  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));
196 #else
197  for (size_t i = 0; i < nelements; i+=nsteps)
198  ff(i);
199 #endif
200 }
201 
202 //______________________________________________________________________________
203 template<typename AFloat>
204 template<typename Function_t>
205 inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
206 {
207  AFloat *dataB = GetRawDataPointer();
208  const AFloat *dataA = A.GetRawDataPointer();
209 
210  size_t nelements = GetNElements();
211  R__ASSERT(nelements == A.GetNElements() );
212  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
213 
214  auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID)
215  {
216  for (size_t j = 0; j < nsteps; ++j) {
217  size_t idx = workerID+j;
218  if (idx >= nelements) break;
219  dataB[idx] = f(dataA[idx]);
220  }
221  return 0;
222  };
223 #ifdef DL_USE_MTE
224  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0,nelements,nsteps));
225 #else
226  for (size_t i = 0; i < nelements; i+=nsteps)
227  ff(i);
228 #endif
229 }
230 
231 //______________________________________________________________________________
232 template<typename AFloat>
234 {
235  for (size_t j = 0; j < fNCols; j++) {
236  for (size_t i = 0; i < fNRows; i++) {
237  (*this)(i, j) = 0;
238  }
239  }
240 }
241 
242 
243 } // namespace DNN
244 } // namespace TMVA
245 
246 #endif
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:131
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:77
The TCpuMatrix class.
Definition: CpuMatrix.h:72
auto * m
Definition: textangle.C:8
size_t GetNcols() const
Definition: CpuMatrix.h:127
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:109
#define R__ASSERT(e)
Definition: TError.h:96
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87
#define f(i)
Definition: RSha256.hxx:104
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:164
#define PrintMatrix(mat, text)
Definition: CpuMatrix.h:50
static double A[]
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:132
size_t GetNElements() const
Definition: CpuMatrix.h:128
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:75
ROOT::TThreadExecutor & GetThreadExecutor()
Definition: Config.h:82
This class provides a simple interface to execute the same task multiple times in parallel...
void Print() const
Definition: CpuMatrix.h:145
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix...
Definition: CpuMatrix.h:205
TCpuBuffer.
Definition: CpuBuffer.h:43
unsigned int UInt_t
Definition: RtypesCore.h:42
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:88
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:139
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:136
TCpuMatrix & operator=(const TCpuMatrix &)=default
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:86
UInt_t GetNCpu()
Definition: Config.h:76
TCpuMatrix(size_t nRows, size_t nCols)
Construct matrix and allocate space for its elements.
Definition: CpuMatrix.cxx:23
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:178
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:233
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:137
Abstract ClassifierFactory template that handles arbitrary types.
size_t GetNrows() const
Definition: CpuMatrix.h:126
const Int_t n
Definition: legend1.C:16