Logo ROOT  
Reference Guide
CpuTensor.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Authors: Sitong An, Lorenzo Moneta 10/2019
3
4/*************************************************************************
5 * Copyright (C) 2019, ROOT *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////
13// Definition of the CpuTensor class used to represent //
14// tensor data in deep neural nets (CNN, RNN, etc..) //
15//////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
19
20#include <cstddef>
21
22
23#include "TMatrix.h"
24#include "TMVA/Config.h"
25#include "CpuBuffer.h"
26#include "CpuMatrix.h"
27#include <TMVA/RTensor.hxx>
28
29namespace TMVA {
30namespace DNN {
31
32// CPU Tensor Class
33// It is a simple wrapper for TMVA RTensor based on
34// memory owned by CPU Buffer
35// We need to keep a pointer for CPUBuffer for fast conversion
36// without copying to TCpuMatrix
37// also provides compatibility with old interface
38
39template <typename AFloat>
40class TCpuTensor : public TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>> {
41
42private:
43 //TCpuTensor will have no extra private members than RTensor
44public:
45 friend class TCpuMatrix<AFloat>;
46
50 using Scalar_t = AFloat;
51
52 // default constructor
53 TCpuTensor(): TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(0), {0})
54 {}
55
56 /** constructors from n m */
57 TCpuTensor(size_t n, size_t m, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
58 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(n * m), {n, m}, memlayout)
59 {}
60
61 /** constructors from batch size, depth, height*width */
62 TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
63 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * hw), {depth, hw, bsize}, memlayout)
64 {
65 if (memlayout == MemoryLayout::RowMajor)
66 this->ReshapeInplace({bsize, depth, hw});
67 }
68
69 /** constructors from batch size, depth, height, width */
70 TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width,
71 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
72 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * height * width),
73 {depth, height, width, bsize}, memlayout)
74 {
75 if (memlayout == MemoryLayout::RowMajor)
76 this->ReshapeInplace({bsize, depth, height, width});
77 }
78
79 /** constructors from a shape.*/
80 TCpuTensor(Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
81 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)),
82 shape, memlayout)
83 {}
84
85 /* constructors from a AFloat pointer and a shape. This is a copy */
86
87 TCpuTensor(AFloat *data, const Shape_t &shape,
88 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
89 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)), shape, memlayout)
90 {
91 auto& container = *(this->GetContainer());
92 for (size_t i = 0; i < this->GetSize(); ++i) container[i] = data[i];
93 }
94
95
96
97 /** constructors from a TCpuBuffer and a shape */
98 //unsafe method for backwards compatibility, const not promised. A view.
99 TCpuTensor(const TCpuBuffer<AFloat>& buffer, Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
100 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(buffer), shape, memlayout) {
101 R__ASSERT(this->GetSize() <= this->GetContainer()->GetSize());
102 }
103
104
105
106 /** constructors from a TCpuMatrix. Memory layout is forced to be same as matrix (i.e. columnlayout) */
107 //unsafe method for backwards compatibility, const not promised. A view of underlying data.
108 TCpuTensor(const TCpuMatrix<AFloat> &matrix, size_t dim = 3, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
109 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(matrix.GetBuffer()),{matrix.GetNrows(), matrix.GetNcols()}, memlayout)
110 {
111
112 if (dim > 2) {
113 Shape_t shape = this->GetShape();
114
115 if (this->GetLayout() == MemoryLayout::ColumnMajor) {
116 shape.insert(shape.end(),dim-2, 1);
117 } else {
118 shape.insert(shape.begin(), dim - 2, 1);
119 }
120 this->ReshapeInplace(shape);
121 }
122 }
123
124
125 /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
126 * elements. */
127
128 operator TMatrixT<AFloat>() const {
129 // this should work only for size 2 or 4 tensors
130 if (this->GetShape().size() == 2 || (this->GetShape().size() == 3 && GetFirstSize() == 1)) {
132 return temp;
133 }
134 // convert as a flat vector
135 return TMatrixT<AFloat>(1, this->GetSize(), this->GetData());
136 }
137
138
139 /** Return raw pointer to the elements stored contiguously in column-major
140 * order. */
141 AFloat *GetRawDataPointer() { return *(this->GetContainer()); }
142 const AFloat *GetRawDataPointer() const { return *(this->GetContainer()); }
143
144 // for same API as CudaTensor (device buffer is the CpuBuffer)
145 const TCpuBuffer<AFloat> & GetDeviceBuffer() const {return *(this->GetContainer());}
147
148
149 size_t GetNoElements() const { return this->GetSize(); }
150
151 // return the size of the first dimension (if in row order) or last dimension if in column order
152 // Tensor is F x H x W x...for row order layout FHWC
153 // or H x W x ... x F for column order layout CHWF
154 // logic copied from TCudaTensor
155 size_t GetFirstSize() const
156 {
157 auto& shape = this->GetShape();
158 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.back() : shape.front();
159 }
160
161 size_t GetCSize() const
162 {
163 auto& shape = this->GetShape();
164 if (shape.size() == 2) return 1;
165 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.front() : shape[1]; // assume NHWC
166 }
167 //
168 size_t GetHSize() const
169 {
170 auto& shape = this->GetShape();
171 if (shape.size() == 2) return shape[0];
172 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[0] : shape[1] ;
173 if (shape.size() >= 4) return shape[2] ;
174 return 0;
175
176 }
177 size_t GetWSize() const
178 {
179 auto& shape = this->GetShape();
180 if (shape.size() == 2) return shape[1];
181 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[1] : shape[2] ;
182 if (shape.size() >= 4) return shape[3] ;
183 return 0;
184
185 }
186
187 // for backward compatibility (assume column-major
188 // for backward compatibility : for CM tensor (n1,n2,n3,n4) -> ( n1*n2*n3, n4)
189 // for RM tensor (n1,n2,n3,n4) -> ( n2*n3*n4, n1 ) ???
190 size_t GetNrows() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetStrides().back() : this->GetShape().front();}
191 size_t GetNcols() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetShape().back() : this->GetStrides().front(); }
192
193
194 MemoryLayout GetLayout() const { return this->GetMemoryLayout(); }
195
196 //this will be an unsafe view. Method exists for backwards compatibility only
198 {
199 size_t ndims = 0;
200 auto& shape = this->GetShape();
201 //check if squeezable but do not actually squeeze
202 for (auto& shape_i : shape){
203 if (shape_i != 1) {
204 ndims++;
205 }
206 }
207 assert(ndims <= 2 && shape.size() > 1); // to support shape cases {n,1}
208 return TCpuMatrix<AFloat>(*(this->GetContainer()), GetHSize(), GetWSize());
209 }
210
211 // Create copy, replace and return
213 {
214 TCpuTensor<AFloat> x(*this);
215 x.ReshapeInplace(shape);
216 return x;
217 }
218
219 // return a view of slices in the first dimension (if row wise) or last dimension if colun wise
220 // so single event slices
222 {
223 auto &shape = this->GetShape();
224 auto layout = this->GetMemoryLayout();
225 Shape_t sliced_shape = (layout == MemoryLayout::RowMajor) ? Shape_t(shape.begin() + 1, shape.end())
226 : Shape_t(shape.begin(), shape.end() - 1);
227
228 size_t buffsize = (layout == MemoryLayout::RowMajor) ? this->GetStrides().front() : this->GetStrides().back();
229 size_t offset = i * buffsize;
230
231 return TCpuTensor<AFloat>(this->GetContainer()->GetSubBuffer(offset, buffsize), sliced_shape, layout);
232 }
233
234 TCpuTensor<AFloat> At(size_t i) const { return (const_cast<TCpuTensor<AFloat> &>(*this)).At(i); }
235
236 // for compatibility with old tensor (std::vector<matrix>)
238 assert(this->GetMemoryLayout() == MemoryLayout::ColumnMajor );
239 return At(i).GetMatrix();
240 }
241
242 // set all the tensor contents to zero
243 void Zero()
244 {
245 AFloat *data = *(this->GetContainer());
246 for (size_t i = 0; i < this->GetSize(); ++i)
247 data[i] = 0;
248 }
249
250 // access single element - assume tensor dim is 2
251 AFloat &operator()(size_t i, size_t j)
252 {
253 auto &shape = this->GetShape();
254 assert(shape.size() == 2);
255 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (*(this->GetContainer()))[i * shape[1] + j]
256 : (*(this->GetContainer()))[j * shape[0] + i];
257 }
258
259 // access single element - assume tensor dim is 3. First index i is always the major indipendent of row-major or
260 // column major row- major I - J - K . Column- major is J - K - I
261 AFloat &operator()(size_t i, size_t j, size_t k)
262 {
263 auto &shape = this->GetShape();
264 assert(shape.size() == 3);
265
266 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
267 ? (*(this->GetContainer()))[i * shape[1] * shape[2] + j * shape[2] + k]
268 : (*(this->GetContainer()))[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
269 }
270
271 // access single element - assume tensor dim is 2
272 AFloat operator()(size_t i, size_t j) const
273 {
274 auto &shape = this->GetShape();
275 assert(shape.size() == 2);
276 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (this->GetData())[i * shape[1] + j]
277 : (this->GetData())[j * shape[0] + i];
278 }
279
280 AFloat operator()(size_t i, size_t j, size_t k) const
281 {
282 auto &shape = this->GetShape();
283 assert(shape.size() == 3);
284
285 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
286 ? (this->GetData())[i * shape[1] * shape[2] + j * shape[2] + k]
287 : (this->GetData())[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
288 }
289
290 /** Map the given function over the matrix elements. Executed in parallel
291 * using TThreadExecutor. */
292 template <typename Function_t>
293 void Map(Function_t & f);
294
295 /** Same as maps but takes the input values from the tensor \p A and writes
296 * the results in this tensor. */
297 template <typename Function_t>
298 void MapFrom(Function_t & f, const TCpuTensor<AFloat> &A);
299
300 size_t GetBufferUseCount() const { return this->GetContainer()->GetUseCount(); }
301
302 void Print(const char *name = "Tensor") const
303 {
305
306 for (size_t i = 0; i < this->GetSize(); i++)
307 std::cout << (this->GetData())[i] << " ";
308 std::cout << std::endl;
309 }
310 void PrintShape(const char *name = "Tensor") const
311 {
312 std::string memlayout = (GetLayout() == MemoryLayout::RowMajor) ? "RowMajor" : "ColMajor";
313 std::cout << name << " shape : { ";
314 auto &shape = this->GetShape();
315 for (size_t i = 0; i < shape.size() - 1; ++i)
316 std::cout << shape[i] << " , ";
317 std::cout << shape.back() << " } "
318 << " Layout : " << memlayout << std::endl;
319 }
320};
321
322//______________________________________________________________________________
323template <typename AFloat>
324template <typename Function_t>
325inline void TCpuTensor<AFloat>::Map(Function_t &f)
326{
327 AFloat *data = GetRawDataPointer();
328 size_t nelements = GetNoElements();
329 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
330
331 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
332 size_t jMax = std::min(workerID + nsteps, nelements);
333 for (size_t j = workerID; j < jMax; ++j) {
334 data[j] = f(data[j]);
335 }
336 return 0;
337 };
338
339 if (nsteps < nelements) {
340 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
341
342 // for (size_t i = 0; i < nelements; i+=nsteps)
343 // ff(i);
344
345 } else {
346 R__ASSERT(nelements == nsteps);
347 ff(0);
348 }
349}
350
351//______________________________________________________________________________
352template <typename AFloat>
353template <typename Function_t>
354inline void TCpuTensor<AFloat>::MapFrom(Function_t &f, const TCpuTensor<AFloat> &A)
355{
356 AFloat *dataB = GetRawDataPointer();
357 const AFloat *dataA = A.GetRawDataPointer();
358
359 size_t nelements = GetNoElements();
360 R__ASSERT(nelements == A.GetNoElements());
361 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
362
363 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
364 size_t jMax = std::min(workerID + nsteps, nelements);
365 for (size_t j = workerID; j < jMax; ++j) {
366 dataB[j] = f(dataA[j]);
367 }
368 return 0;
369 };
370 if (nsteps < nelements) {
371 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
372 // for (size_t i = 0; i < nelements; i+=nsteps)
373 // ff(i);
374
375 } else {
376 R__ASSERT(nelements == nsteps);
377 ff(0);
378 }
379}
380
381
382} // namespace DNN
383} // namespace TMVA
384
385#endif
#define f(i)
Definition: RSha256.hxx:104
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107
The TCpuMatrix class.
Definition: CpuMatrix.h:86
size_t GetNcols() const
Definition: CpuMatrix.h:156
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:191
size_t GetNrows() const
Definition: CpuMatrix.h:155
size_t GetBufferUseCount() const
Definition: CpuTensor.h:300
AFloat operator()(size_t i, size_t j, size_t k) const
Definition: CpuTensor.h:280
TCpuTensor(size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from n m
Definition: CpuTensor.h:57
TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from batch size, depth, height, width
Definition: CpuTensor.h:70
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuTensor.h:141
size_t GetNoElements() const
Definition: CpuTensor.h:149
size_t GetWSize() const
Definition: CpuTensor.h:177
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuTensor.h:325
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
Definition: CpuTensor.h:145
TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from batch size, depth, height*width
Definition: CpuTensor.h:62
TCpuMatrix< AFloat > operator[](size_t i) const
Definition: CpuTensor.h:237
const AFloat * GetRawDataPointer() const
Definition: CpuTensor.h:142
TCpuBuffer< AFloat > & GetDeviceBuffer()
Definition: CpuTensor.h:146
size_t GetCSize() const
Definition: CpuTensor.h:161
AFloat & operator()(size_t i, size_t j, size_t k)
Definition: CpuTensor.h:261
TCpuTensor(const TCpuBuffer< AFloat > &buffer, Shape_t shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a TCpuBuffer and a shape
Definition: CpuTensor.h:99
void MapFrom(Function_t &f, const TCpuTensor< AFloat > &A)
Same as maps but takes the input values from the tensor A and writes the results in this tensor.
Definition: CpuTensor.h:354
AFloat operator()(size_t i, size_t j) const
Definition: CpuTensor.h:272
TCpuTensor(const TCpuMatrix< AFloat > &matrix, size_t dim=3, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a TCpuMatrix.
Definition: CpuTensor.h:108
TCpuMatrix< AFloat > GetMatrix() const
Definition: CpuTensor.h:197
TCpuTensor< AFloat > At(size_t i) const
Definition: CpuTensor.h:234
size_t GetNcols() const
Definition: CpuTensor.h:191
TCpuTensor(Shape_t shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a shape.
Definition: CpuTensor.h:80
size_t GetFirstSize() const
Definition: CpuTensor.h:155
size_t GetNrows() const
Definition: CpuTensor.h:190
void PrintShape(const char *name="Tensor") const
Definition: CpuTensor.h:310
AFloat & operator()(size_t i, size_t j)
Definition: CpuTensor.h:251
TCpuTensor< AFloat > At(size_t i)
Definition: CpuTensor.h:221
friend class TCpuMatrix< AFloat >
Definition: CpuTensor.h:45
MemoryLayout GetLayout() const
Definition: CpuTensor.h:194
void Print(const char *name="Tensor") const
Definition: CpuTensor.h:302
TCpuTensor< AFloat > Reshape(Shape_t shape) const
Definition: CpuTensor.h:212
typename TMVA::Experimental::RTensor< AFloat >::Shape_t Shape_t
Definition: CpuTensor.h:47
TCpuTensor(AFloat *data, const Shape_t &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
Definition: CpuTensor.h:87
size_t GetHSize() const
Definition: CpuTensor.h:168
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
RTensor is a container with contiguous memory and shape information.
Definition: RTensor.hxx:162
void ReshapeInplace(const Shape_t &shape)
Reshape tensor in place.
Definition: RTensor.hxx:312
RTensor(Value_t *data, Shape_t shape, MemoryLayout layout=MemoryLayout::RowMajor)
Construct a tensor as view on data.
Definition: RTensor.hxx:189
std::vector< std::size_t > Shape_t
Definition: RTensor.hxx:166
TMatrixT.
Definition: TMatrixT.h:39
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
Py_ssize_t GetBuffer(PyObject *pyobject, char tc, int size, void *&buf, bool check=true)
Definition: Utility.cxx:614
static double A[]
std::size_t GetSizeFromShape(const T &shape)
Get size of tensor from shape vector.
Definition: RTensor.hxx:28
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:47
create variable transformations
auto * m
Definition: textangle.C:8