Logo ROOT  
Reference Guide
CpuTensor.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Authors: Sitong An, Lorenzo Moneta 10/2019
3
4/*************************************************************************
5 * Copyright (C) 2019, ROOT *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////
13// Definition of the CpuTensor class used to represent //
14// tensor data in deep neural nets (CNN, RNN, etc..) //
15//////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
18#define TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
19
20#include <cstddef>
21#include <vector>
22
23#include "TMatrix.h"
24#include "TMVA/Config.h"
25#include "CpuBuffer.h"
26#include "CpuMatrix.h"
27#include <TMVA/Config.h>
28#include <TMVA/RTensor.hxx>
29
30namespace TMVA {
31namespace DNN {
32
33// CPU Tensor Class
34// It is a simple wrapper for TMVA RTensor based on
35// memory owned by CPU Buffer
36// We need to keep a pointer for CPUBuffer for fast conversion
37// without copying to TCpuMatrix
38// also provides compatibility with old interface
39
40template <typename AFloat>
41class TCpuTensor : public TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>> {
42
43private:
44 //TCpuTensor will have no extra private members than RTensor
45public:
46 friend class TCpuMatrix<AFloat>;
47
51 using Scalar_t = AFloat;
52
53 // default constructor
54 TCpuTensor(): TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(0), {0})
55 {}
56
57 /** constructors from n m */
58 TCpuTensor(size_t n, size_t m, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
59 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(n * m), {n, m}, memlayout)
60 {}
61
62 /** constructors from batch size, depth, height*width */
63 TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
64 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * hw), {depth, hw, bsize}, memlayout)
65 {
66 if (memlayout == MemoryLayout::RowMajor)
67 this->ReshapeInplace({bsize, depth, hw});
68 }
69
70 /** constructors from batch size, depth, height, width */
71 TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width,
72 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
73 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * height * width),
74 {depth, height, width, bsize}, memlayout)
75 {
76 if (memlayout == MemoryLayout::RowMajor)
77 this->ReshapeInplace({bsize, depth, height, width});
78 }
79
80 /** constructors from a shape.*/
81 TCpuTensor(Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
82 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)),
83 shape, memlayout)
84 {}
85
86 /* constructors from a AFloat pointer and a shape. This is a copy */
87
88 TCpuTensor(AFloat *data, const Shape_t &shape,
89 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
90 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)), shape, memlayout)
91 {
92 auto& container = *(this->GetContainer());
93 for (size_t i = 0; i < this->GetSize(); ++i) container[i] = data[i];
94 }
95
96
97
98 /** constructors from a TCpuBuffer and a shape */
99 //unsafe method for backwards compatibility, const not promised. A view.
100 TCpuTensor(const TCpuBuffer<AFloat>& buffer, Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
101 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(buffer), shape, memlayout) {
102 R__ASSERT(this->GetSize() <= this->GetContainer()->GetSize());
103 }
104
105
106
107 /** constructors from a TCpuMatrix. Memory layout is forced to be same as matrix (i.e. columnlayout) */
108 //unsafe method for backwards compatibility, const not promised. A view of underlying data.
109 TCpuTensor(const TCpuMatrix<AFloat> &matrix, size_t dim = 3, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
110 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(matrix.GetBuffer()),{matrix.GetNrows(), matrix.GetNcols()}, memlayout)
111 {
112
113 if (dim > 2) {
114 Shape_t shape = this->GetShape();
115
116 if (this->GetLayout() == MemoryLayout::ColumnMajor) {
117 shape.insert(shape.end(),dim-2, 1);
118 } else {
119 shape.insert(shape.begin(), dim - 2, 1);
120 }
121 this->ReshapeInplace(shape);
122 }
123 }
124
125
126 /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
127 * elements. */
128
129 operator TMatrixT<AFloat>() const {
130 // this should work only for size 2 or 4 tensors
131 if (this->GetShape().size() == 2 || (this->GetShape().size() == 3 && GetFirstSize() == 1)) {
133 return temp;
134 }
135 // convert as a flat vector
136 return TMatrixT<AFloat>(1, this->GetSize(), this->GetData());
137 }
138
139
140 /** Return raw pointer to the elements stored contiguously in column-major
141 * order. */
142 AFloat *GetRawDataPointer() { return *(this->GetContainer()); }
143 const AFloat *GetRawDataPointer() const { return *(this->GetContainer()); }
144
145 // for same API as CudaTensor (device buffer is the CpuBuffer)
146 const TCpuBuffer<AFloat> & GetDeviceBuffer() const {return *(this->GetContainer());}
148
149
150 size_t GetNoElements() const { return this->GetSize(); }
151
152 // return the size of the first dimension (if in row order) or last dimension if in column order
153 // Tensor is F x H x W x...for row order layout FHWC
154 // or H x W x ... x F for column order layout CHWF
155 // logic copied from TCudaTensor
156 size_t GetFirstSize() const
157 {
158 auto& shape = this->GetShape();
159 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.back() : shape.front();
160 }
161
162 size_t GetCSize() const
163 {
164 auto& shape = this->GetShape();
165 if (shape.size() == 2) return 1;
166 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.front() : shape[1]; // assume NHWC
167 }
168 //
169 size_t GetHSize() const
170 {
171 auto& shape = this->GetShape();
172 if (shape.size() == 2) return shape[0];
173 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[0] : shape[1] ;
174 if (shape.size() >= 4) return shape[2] ;
175 return 0;
176
177 }
178 size_t GetWSize() const
179 {
180 auto& shape = this->GetShape();
181 if (shape.size() == 2) return shape[1];
182 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[1] : shape[2] ;
183 if (shape.size() >= 4) return shape[3] ;
184 return 0;
185
186 }
187
188 // for backward compatibility (assume column-major
189 // for backward compatibility : for CM tensor (n1,n2,n3,n4) -> ( n1*n2*n3, n4)
190 // for RM tensor (n1,n2,n3,n4) -> ( n2*n3*n4, n1 ) ???
191 size_t GetNrows() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetStrides().back() : this->GetShape().front();}
192 size_t GetNcols() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetShape().back() : this->GetStrides().front(); }
193
194
195 MemoryLayout GetLayout() const { return this->GetMemoryLayout(); }
196
197 //this will be an unsafe view. Method exists for backwards compatibility only
199 {
200 size_t ndims = 0;
201 auto& shape = this->GetShape();
202 //check if squeezable but do not actually squeeze
203 for (auto& shape_i : shape){
204 if (shape_i != 1) {
205 ndims++;
206 }
207 }
208 assert(ndims <= 2 && shape.size() > 1); // to support shape cases {n,1}
209 return TCpuMatrix<AFloat>(*(this->GetContainer()), GetHSize(), GetWSize());
210 }
211
212 // Create copy, replace and return
214 {
215 TCpuTensor<AFloat> x(*this);
216 x.ReshapeInplace(shape);
217 return x;
218 }
219
220 // return a view of slices in the first dimension (if row wise) or last dimension if colun wise
221 // so single event slices
223 {
224 auto &shape = this->GetShape();
225 auto layout = this->GetMemoryLayout();
226 Shape_t sliced_shape = (layout == MemoryLayout::RowMajor) ? Shape_t(shape.begin() + 1, shape.end())
227 : Shape_t(shape.begin(), shape.end() - 1);
228
229 size_t buffsize = (layout == MemoryLayout::RowMajor) ? this->GetStrides().front() : this->GetStrides().back();
230 size_t offset = i * buffsize;
231
232 return TCpuTensor<AFloat>(this->GetContainer()->GetSubBuffer(offset, buffsize), sliced_shape, layout);
233 }
234
235 TCpuTensor<AFloat> At(size_t i) const { return (const_cast<TCpuTensor<AFloat> &>(*this)).At(i); }
236
237 // set all the tensor contents to zero
238 void Zero()
239 {
240 AFloat *data = *(this->GetContainer());
241 for (size_t i = 0; i < this->GetSize(); ++i)
242 data[i] = 0;
243 }
244
245 // access single element - assume tensor dim is 2
246 AFloat &operator()(size_t i, size_t j)
247 {
248 auto &shape = this->GetShape();
249 assert(shape.size() == 2);
250 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (*(this->GetContainer()))[i * shape[1] + j]
251 : (*(this->GetContainer()))[j * shape[0] + i];
252 }
253
254 // access single element - assume tensor dim is 3. First index i is always the major indipendent of row-major or
255 // column major row- major I - J - K . Column- major is J - K - I
256 AFloat &operator()(size_t i, size_t j, size_t k)
257 {
258 auto &shape = this->GetShape();
259 assert(shape.size() == 3);
260
261 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
262 ? (*(this->GetContainer()))[i * shape[1] * shape[2] + j * shape[2] + k]
263 : (*(this->GetContainer()))[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
264 }
265
266 // access single element - assume tensor dim is 2
267 AFloat operator()(size_t i, size_t j) const
268 {
269 auto &shape = this->GetShape();
270 assert(shape.size() == 2);
271 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (this->GetData())[i * shape[1] + j]
272 : (this->GetData())[j * shape[0] + i];
273 }
274
275 AFloat operator()(size_t i, size_t j, size_t k) const
276 {
277 auto &shape = this->GetShape();
278 assert(shape.size() == 3);
279
280 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
281 ? (this->GetData())[i * shape[1] * shape[2] + j * shape[2] + k]
282 : (this->GetData())[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
283 }
284
285 /** Map the given function over the matrix elements. Executed in parallel
286 * using TThreadExecutor. */
287 template <typename Function_t>
288 void Map(Function_t & f);
289
290 /** Same as maps but takes the input values from the tensor \p A and writes
291 * the results in this tensor. */
292 template <typename Function_t>
293 void MapFrom(Function_t & f, const TCpuTensor<AFloat> &A);
294
295 size_t GetBufferUseCount() const { return this->GetContainer()->GetUseCount(); }
296
297 void Print(const char *name = "Tensor") const
298 {
300
301 for (size_t i = 0; i < this->GetSize(); i++)
302 std::cout << (this->GetData())[i] << " ";
303 std::cout << std::endl;
304 }
305 void PrintShape(const char *name = "Tensor") const
306 {
307 std::string memlayout = (GetLayout() == MemoryLayout::RowMajor) ? "RowMajor" : "ColMajor";
308 std::cout << name << " shape : { ";
309 auto &shape = this->GetShape();
310 for (size_t i = 0; i < shape.size() - 1; ++i)
311 std::cout << shape[i] << " , ";
312 std::cout << shape.back() << " } "
313 << " Layout : " << memlayout << std::endl;
314 }
315};
316
317//______________________________________________________________________________
318template <typename AFloat>
319template <typename Function_t>
320inline void TCpuTensor<AFloat>::Map(Function_t &f)
321{
322 AFloat *data = GetRawDataPointer();
323 size_t nelements = GetNoElements();
324 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
325
326 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
327 size_t jMax = std::min(workerID + nsteps, nelements);
328 for (size_t j = workerID; j < jMax; ++j) {
329 data[j] = f(data[j]);
330 }
331 return 0;
332 };
333
334 if (nsteps < nelements) {
335 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
336
337 // for (size_t i = 0; i < nelements; i+=nsteps)
338 // ff(i);
339
340 } else {
341 R__ASSERT(nelements == nsteps);
342 ff(0);
343 }
344}
345
346//______________________________________________________________________________
347template <typename AFloat>
348template <typename Function_t>
349inline void TCpuTensor<AFloat>::MapFrom(Function_t &f, const TCpuTensor<AFloat> &A)
350{
351 AFloat *dataB = GetRawDataPointer();
352 const AFloat *dataA = A.GetRawDataPointer();
353
354 size_t nelements = GetNoElements();
355 R__ASSERT(nelements == A.GetNoElements());
356 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
357
358 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
359 size_t jMax = std::min(workerID + nsteps, nelements);
360 for (size_t j = workerID; j < jMax; ++j) {
361 dataB[j] = f(dataA[j]);
362 }
363 return 0;
364 };
365 if (nsteps < nelements) {
366 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
367 // for (size_t i = 0; i < nelements; i+=nsteps)
368 // ff(i);
369
370 } else {
371 R__ASSERT(nelements == nsteps);
372 ff(0);
373 }
374}
375
376
377} // namespace DNN
378} // namespace TMVA
379
380#endif
#define f(i)
Definition: RSha256.hxx:104
unsigned int UInt_t
Definition: RtypesCore.h:42
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition: Config.h:83
static Config & Instance()
static function: returns TMVA instance
Definition: Config.cxx:107
The TCpuMatrix class.
Definition: CpuMatrix.h:87
size_t GetNcols() const
Definition: CpuMatrix.h:152
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:187
size_t GetNrows() const
Definition: CpuMatrix.h:151
size_t GetBufferUseCount() const
Definition: CpuTensor.h:295
AFloat operator()(size_t i, size_t j, size_t k) const
Definition: CpuTensor.h:275
TCpuTensor(size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from n m
Definition: CpuTensor.h:58
TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from batch size, depth, height, width
Definition: CpuTensor.h:71
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuTensor.h:142
size_t GetNoElements() const
Definition: CpuTensor.h:150
size_t GetWSize() const
Definition: CpuTensor.h:178
void Map(Function_t &f)
Map the given function over the matrix elements.
Definition: CpuTensor.h:320
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
Definition: CpuTensor.h:146
TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from batch size, depth, height*width
Definition: CpuTensor.h:63
const AFloat * GetRawDataPointer() const
Definition: CpuTensor.h:143
TCpuBuffer< AFloat > & GetDeviceBuffer()
Definition: CpuTensor.h:147
size_t GetCSize() const
Definition: CpuTensor.h:162
AFloat & operator()(size_t i, size_t j, size_t k)
Definition: CpuTensor.h:256
TCpuTensor(const TCpuBuffer< AFloat > &buffer, Shape_t shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a TCpuBuffer and a shape
Definition: CpuTensor.h:100
void MapFrom(Function_t &f, const TCpuTensor< AFloat > &A)
Same as maps but takes the input values from the tensor A and writes the results in this tensor.
Definition: CpuTensor.h:349
AFloat operator()(size_t i, size_t j) const
Definition: CpuTensor.h:267
TCpuTensor(const TCpuMatrix< AFloat > &matrix, size_t dim=3, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a TCpuMatrix.
Definition: CpuTensor.h:109
TCpuMatrix< AFloat > GetMatrix() const
Definition: CpuTensor.h:198
TCpuTensor< AFloat > At(size_t i) const
Definition: CpuTensor.h:235
size_t GetNcols() const
Definition: CpuTensor.h:192
TCpuTensor(Shape_t shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
constructors from a shape.
Definition: CpuTensor.h:81
size_t GetFirstSize() const
Definition: CpuTensor.h:156
size_t GetNrows() const
Definition: CpuTensor.h:191
void PrintShape(const char *name="Tensor") const
Definition: CpuTensor.h:305
AFloat & operator()(size_t i, size_t j)
Definition: CpuTensor.h:246
TCpuTensor< AFloat > At(size_t i)
Definition: CpuTensor.h:222
friend class TCpuMatrix< AFloat >
Definition: CpuTensor.h:46
MemoryLayout GetLayout() const
Definition: CpuTensor.h:195
void Print(const char *name="Tensor") const
Definition: CpuTensor.h:297
TCpuTensor< AFloat > Reshape(Shape_t shape) const
Definition: CpuTensor.h:213
typename TMVA::Experimental::RTensor< AFloat >::Shape_t Shape_t
Definition: CpuTensor.h:48
TCpuTensor(AFloat *data, const Shape_t &shape, MemoryLayout memlayout=MemoryLayout::ColumnMajor)
Definition: CpuTensor.h:88
size_t GetHSize() const
Definition: CpuTensor.h:169
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition: Executor.h:110
RTensor is a container with contiguous memory and shape information.
Definition: RTensor.hxx:162
void ReshapeInplace(const Shape_t &shape)
Reshape tensor in place.
Definition: RTensor.hxx:312
RTensor(Value_t *data, Shape_t shape, MemoryLayout layout=MemoryLayout::RowMajor)
Construct a tensor as view on data.
Definition: RTensor.hxx:189
std::vector< std::size_t > Shape_t
Definition: RTensor.hxx:166
TMatrixT.
Definition: TMatrixT.h:39
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
int GetBuffer(PyObject *pyobject, char tc, int size, void *&buf, Bool_t check=kTRUE)
Retrieve a linear buffer pointer from the given pyobject.
Definition: Utility.cxx:562
static double A[]
std::size_t GetSizeFromShape(const T &shape)
Get size of tensor from shape vector.
Definition: RTensor.hxx:28
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:47
create variable transformations
auto * m
Definition: textangle.C:8