Logo ROOT  
Reference Guide
CpuMatrix.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////
13// Definition of the CpuMatrix class used to represent //
14// weight and bias matrices in neural nets. //
15//////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19
20#ifdef R__USE_IMT
21#define DL_USE_MTE // use MT with tbb
22#endif
23
24#include <cstddef>
25#include <vector>
26
27#include "TMatrix.h"
28#include "TMVA/Config.h"
29#include "CpuBuffer.h"
30#include <TMVA/Config.h>
31
32// #define DEBUG_TMVA_TCPUMATRIX
33#if defined(DEBUG_TMVA_TCPUMATRIX)
34/*
35 * Debug(!) function for printing matrices.
36 *
37 * Prints the input expression `mat` using preprocessor directives (with
38 * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate
39 * "matA is null pointer".
40 *
41 * Note: This is a preprocessor macro. It does _not_ respect namespaces.
42 *
43 * @param mat Matrix to print
44 * @param text Name of matrix
45 */
46#define TMVA_DNN_PrintTCpuMatrix(mat, text) \
47 { \
48 auto _dpointer = mat.GetRawDataPointer(); \
49 if (_dpointer == NULL) { \
50 std::cout << #mat << " is null pointer" << std::endl; \
51 exit(1); \
52 } \
53 auto _nrows = mat.GetNrows(); \
54 auto _ncols = mat.GetNcols(); \
55 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
56 << "--------------------" << std::endl; \
57 for (size_t _i = 0; _i < _nrows; _i++) { \
58 for (size_t _j = 0; _j < _ncols; _j++) { \
59 std::cout << mat(_i, _j); \
60 if (_j < _ncols - 1) \
61 std::cout << ","; \
62 } \
63 std::cout << std::endl; \
64 } \
65 }
66#else
67#define TMVA_DNN_PrintTCpuMatrix(mat, text)
68#endif
69
70namespace TMVA {
71namespace DNN {
72
73/** The TCpuMatrix class.
74 *
75 * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
76 * class to store the matrices in column-major format for compatibility with
77 * BLAS. Provides Map and MapFrom member functions to simplify the application of
78 * activation functions and derivatives to matrices.
79 *
80 * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
81 * copying is fast and the resulting objects share the element data.
82 *
83 * \tparam AFloat The floating point type used to represent the matrix elements.
84 */
85//______________________________________________________________________________
86template <typename AFloat>
88private:
89 static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
90
91public:
92 TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
93 ///< in column-major format.
94private:
95 size_t fNCols;
96 size_t fNRows;
97
98public:
99 // friend class TCpuTensor<AFloat>;
100
101 /** Returns pointer to a vector holding only ones with a guaranteed length
102 * of the number of columns of every instantiated CpuMatrix object. */
103
104
106 const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}
107
108
109 static const AFloat *GetOnePointer() { return fOnes.data(); }
110
111 static size_t GetOnePointerSize() { return fOnes.size(); }
112
113 static void InitializeOneVector(size_t n);
114
116
117 /** Construct matrix and allocate space for its elements. */
118 TCpuMatrix(size_t nRows, size_t nCols);
119 /** Construct a TCpuMatrix object by (deeply) copying from a
120 * TMatrixT<Double_t> matrix. */
122 /** Construct a m-times-n matrix from the given buffer. The size must of
123 * course match. */
124 TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
125
126 // N.B the default copy constructor does a shallow copy (NOT a deep one) !
127 TCpuMatrix(const TCpuMatrix &) = default;
128 TCpuMatrix(TCpuMatrix &&) = default;
129 TCpuMatrix &operator=(const TCpuMatrix &) = default;
131 ~TCpuMatrix() = default;
132
133 /** Clear content of the matrix and initialize to zero elements
134 */
135 void Zero();
136
137 /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
138 * elements. */
139 operator TMatrixT<AFloat>() const;
140
141 /** Map the given function over the matrix elements. Executed in parallel
142 * using TThreadExecutor. */
143 template <typename Function_t>
144 void Map(Function_t &f);
145
146 /** Same as maps but takes the input values from the matrix \p A and writes
147 * the results in this matrix. */
148 template <typename Function_t>
149 void MapFrom(Function_t &f, const TCpuMatrix &A);
150
151 size_t GetNrows() const { return fNRows; }
152 size_t GetNcols() const { return fNCols; }
153 size_t GetNoElements() const { return fNRows * fNCols; }
154 size_t GetSize() const { return fNRows * fNCols; }
155
156 /** Return matrix element in row \p i and column \p j. */
157 AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }
158 AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }
159
160 /** Return raw pointer to the elements stored contiguously in column-major
161 * order. */
162 AFloat *GetRawDataPointer() { return fBuffer; }
163 const AFloat *GetRawDataPointer() const { return fBuffer; }
164
166
167 // static function to get the number of elements for task
168 static size_t GetNWorkItems(size_t nelements);
169
170 // print matrix
171 void Print() const
172 {
173 TCpuMatrix cpuMatrix = *this;
174 TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");
175 }
176
177private:
178 void Initialize();
179};
180
181template <typename AFloat>
182std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};
183
184// Inline Functions.
185//______________________________________________________________________________
186template <typename AFloat>
187size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
188{
189 // nElements should have at least 100
190 // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
191 // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
192 const size_t minElements = 1000;
193 const size_t nCpu = TMVA::Config::Instance().GetNCpu();
194 if (nElements <= minElements)
195 return nElements;
196 if (nElements < nCpu * minElements) {
197 size_t nt = nElements / minElements;
198 return nElements / nt;
199 }
200 return nElements / nCpu;
201 // if (nElements < nCpu*20) return nElements/nCpu;
202 // return nElements/(nCpu*10);
203}
204
205//______________________________________________________________________________
206template <typename AFloat>
207template <typename Function_t>
208inline void TCpuMatrix<AFloat>::Map(Function_t &f)
209{
210 AFloat *data = GetRawDataPointer();
211 size_t nelements = GetNoElements();
212 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
213
214 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
215 size_t jMax = std::min(workerID + nsteps, nelements);
216 for (size_t j = workerID; j < jMax; ++j) {
217 data[j] = f(data[j]);
218 }
219 return 0;
220 };
221
222 if (nsteps < nelements) {
223 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
224
225 // for (size_t i = 0; i < nelements; i+=nsteps)
226 // ff(i);
227
228 } else {
229 R__ASSERT(nelements == nsteps);
230 ff(0);
231 }
232}
233
234//______________________________________________________________________________
235template <typename AFloat>
236template <typename Function_t>
237inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
238{
239 AFloat *dataB = GetRawDataPointer();
240 const AFloat *dataA = A.GetRawDataPointer();
241
242 size_t nelements = GetNoElements();
243 R__ASSERT(nelements == A.GetNoElements());
244 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
245
246 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
247 size_t jMax = std::min(workerID + nsteps, nelements);
248 for (size_t j = workerID; j < jMax; ++j) {
249 dataB[j] = f(dataA[j]);
250 }
251 return 0;
252 };
253 if (nsteps < nelements) {
254 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
255 // for (size_t i = 0; i < nelements; i+=nsteps)
256 // ff(i);
257
258 } else {
259 R__ASSERT(nelements == nsteps);
260 ff(0);
261 }
262}
263//______________________________________________________________________________
264template <typename AFloat>
266{
267 for (size_t j = 0; j < fNCols; j++) {
268 for (size_t i = 0; i < fNRows; i++) {
269 (*this)(i, j) = 0;
270 }
271 }
272}
273
274} // namespace DNN
275} // namespace TMVA
276
277#endif
#define TMVA_DNN_PrintTCpuMatrix(mat, text)
Definition: CpuMatrix.h:67
#define f(i)
Definition: RSha256.hxx:104
unsigned int UInt_t
Definition: RtypesCore.h:42
#define R__ASSERT(e)
Definition: TError.h:96
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
UInt_t GetNCpu()
Definition: Config.h:72
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107
The TCpuMatrix class.
Definition: CpuMatrix.h:87
TCpuMatrix(TCpuMatrix &&)=default
static std::vector< AFloat > fOnes
Vector filled with ones used for BLAS calls.
Definition: CpuMatrix.h:89
TCpuMatrix & operator=(const TCpuMatrix &)=default
TCpuMatrix(const TCpuBuffer< AFloat > &buffer, size_t m, size_t n)
Construct a m-times-n matrix from the given buffer.
size_t GetNcols() const
Definition: CpuMatrix.h:152
void MapFrom(Function_t &f, const TCpuMatrix &A)
Same as maps but takes the input values from the matrix A and writes the results in this matrix.
Definition: CpuMatrix.h:237
static size_t GetOnePointerSize()
Definition: CpuMatrix.h:111
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:265
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:162
AFloat & operator()(size_t i, size_t j)
Definition: CpuMatrix.h:158
void Print() const
Definition: CpuMatrix.h:171
static const AFloat * GetOnePointer()
Definition: CpuMatrix.h:109
TCpuMatrix(const TMatrixT< AFloat > &)
Construct a TCpuMatrix object by (deeply) copying from a TMatrixT<Double_t> matrix.
AFloat operator()(size_t i, size_t j) const
Return matrix element in row i and column j.
Definition: CpuMatrix.h:157
const AFloat * GetRawDataPointer() const
Definition: CpuMatrix.h:163
const TCpuBuffer< AFloat > & GetBuffer() const
Definition: CpuMatrix.h:106
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:187
size_t GetSize() const
Definition: CpuMatrix.h:154
size_t GetNrows() const
Definition: CpuMatrix.h:151
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87
TCpuMatrix & operator=(TCpuMatrix &&)=default
TCpuBuffer< AFloat > fBuffer
The buffer holding the matrix elements in column-major format.
Definition: CpuMatrix.h:92
TCpuBuffer< AFloat > & GetBuffer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:105
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuMatrix.h:208
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:165
size_t GetNoElements() const
Definition: CpuMatrix.h:153
TCpuMatrix(const TCpuMatrix &)=default
Base Excutor class.
Definition: Executor.h:35
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
TMatrixT.
Definition: TMatrixT.h:39
const Int_t n
Definition: legend1.C:16
static double A[]
create variable transformations
auto * m
Definition: textangle.C:8