Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
SOFIE_common.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_SOFIE_COMMON
2#define TMVA_SOFIE_SOFIE_COMMON
3
4#include "TMVA/RTensor.hxx"
5
6#include "ROOT/RSpan.hxx"
7
8#include <stdexcept>
9#include <type_traits>
10#include <cstdint>
11#include <cstring>
12#include <complex>
13#include <string>
14#include <vector>
15#include <map>
16#include <memory>
17#include <regex>
18#include <sstream>
19#include <iostream>
20#include <iomanip>
21#include <cassert>
22#include <limits>
23
24namespace TMVA {
25namespace Experimental {
26namespace SOFIE {
27
28enum class ETensorType{
29 UNDEFINED = 0, FLOAT = 1, UINT8 = 2, INT8 = 3, UINT16 = 4, INT16 = 5, INT32 = 6, INT64 = 7, STRING = 8, BOOL = 9, //order sensitive
30 FLOAT16 = 10, DOUBLE = 11, UINT32 = 12, UINT64 = 13, COMPLEX64 = 14, COMPLEX28 = 15, BFLOAT16 = 16
31};
32
33enum class EActivationType{
34 UNDEFINED = 0, RELU = 1, SOFTMAX = 2, SIGMOID = 3, LEAKYRELU = 4, TANH = 5, ELU = 6
35};
36
37constexpr size_t GetTypeSize(ETensorType type) {
38 switch (type) {
39 case ETensorType::FLOAT: return sizeof(float);
40 case ETensorType::DOUBLE: return sizeof(double);
41 case ETensorType::UINT8: return sizeof(uint8_t);
42 case ETensorType::INT8: return sizeof(int8_t);
43 case ETensorType::UINT16: return sizeof(uint16_t);
44 case ETensorType::INT16: return sizeof(int16_t);
45 case ETensorType::INT32: return sizeof(int32_t);
46 case ETensorType::INT64: return sizeof(int64_t);
47 case ETensorType::UINT32: return sizeof(uint32_t);
48 case ETensorType::UINT64: return sizeof(uint64_t);
49 case ETensorType::BOOL: return sizeof(bool);
50 case ETensorType::STRING: return sizeof(std::string);
51 default: return 0;
52 }
53}
54
55typedef std::int64_t int_t;
56
59
60// find if a string represents a number
61bool IsInteger(const std::string & s);
62
63struct Dim{
64 bool isParam = false;
65 size_t dim = 0;
66 std::string param;
67
68 // default constructor (for I/O)
69 Dim() {}
70
71 // constructor for a parametric dimension with the option to pass a default dim value
72 // We use -1 for dim to indicate that the param dimension is an expression (e.g. "d1+d2")
73 // in case the string represents a number make Dim not parametric
74 Dim(const std::string & p, size_t d = 0) : isParam(true), dim(d), param(p)
75 {
76 if (IsInteger(p)) {
77 isParam = false;
78 dim = std::stoi(p);
79 }
80 }
81
82 // constructor for a non-parametric dimension
83 Dim(size_t d) : dim(d) {}
84
85 std::string GetVal() const {
86 // cast to int64_t for negative shape values
87 return (isParam) ? param : std::to_string(static_cast<int64_t>(dim));
88 }
89
90 std::ostream& operator<< (std::ostream& os) const {
91 os << GetVal();
92 return os;
93 }
94
95 bool operator==(const Dim& rhs) const {
96 return (isParam && rhs.isParam) ? param == rhs.param : dim == rhs.dim;
97 }
98 bool operator!=(const Dim& rhs) const {
99 return !(*this == rhs);
100 }
101};
102
103//bool operator==(const Dim& lhs, const Dim& rhs);
104inline std::ostream & operator<< (std::ostream &os, const Dim &d) {
105 os << d.GetVal();
106 return os;
107}
108
111 std::vector<Dim> shape;
112};
113
116 std::vector<size_t> shape;
117};
118
121 std::vector<Dim> shape;
122};
123
124// template traits for Tensor Shape
125template <typename T>
126struct TensorShape {};
127template<>
129 static bool IsDim() { return true; }
130};
131template<>
132struct TensorShape<size_t> {
133 static bool IsDim() { return false; }
134};
135
136// template traits for Tensor type
137template <typename T>
138struct TensorType {};
139template<>
140struct TensorType<float> {
141 static const std::string Name() { return "float"; }
142};
143template<>
145 static const std::string Name() { return "double"; }
146};
147template<>
148struct TensorType<int64_t> {
149 static const std::string Name() { return "int64_t"; }
150};
151template<>
152struct TensorType<int32_t> {
153 static const std::string Name() { return "int32_t"; }
154};
155template<>
156struct TensorType<uint32_t> {
157 static const std::string Name() { return "uint32_t"; }
158};
159template<>
160struct TensorType<uint64_t> {
161 static const std::string Name() { return "uint64_t"; }
162};
163template<>
165 static const std::string Name() { return "bool"; }
166};
167template<>
168struct TensorType<int8_t> {
169 static const std::string Name() { return "int8_t"; }
170};
171template<>
172struct TensorType<uint8_t> {
173 static const std::string Name() { return "uint8_t"; }
174};
175
177 std::string_view tensor_name;
179
180 TensorMemoryInfo split(const std::string_view new_name, size_t new_size) {
181 if (new_size > tensor_size) {
182 throw std::invalid_argument("New size exceeds available tensor size.");
183 }
184 tensor_size -= new_size;
185 return TensorMemoryInfo{new_name, new_size};
186 }
187
188 // Method to merge another struct into this one
189 void merge(const TensorMemoryInfo& other) {
190 tensor_size += other.tensor_size;
191 }
192};
193
195
196 // ordered map with chunk_idx as key and TensorMemoryInfo as value
197 std::map<size_t, TensorMemoryInfo> total_stack;
198
199 // ordered map with chunk_idx as key and chunk_size as value
200 std::map<size_t, size_t> available_stack;
201};
202
203std::vector<Dim> ConvertShapeToDim(const std::vector<size_t> & shape);
204
205std::vector<size_t> ConvertShapeToInt(const std::vector<Dim> & shape);
206
207std::size_t ConvertShapeToLength(const std::vector<size_t> & shape);
208
209std::string ConvertShapeToString(const std::vector<size_t> & shape);
210std::string ConvertDimShapeToString(const std::vector<Dim> & shape);
211
212std::string ConvertDimShapeToLength(const std::vector<Dim> & shape);
213
214
215template<class T>
216std::string ConvertValToString(T value) {
217 std::stringstream ret;
218 ret << std::to_string(value);
219 return ret.str();
220}
221// float specialization
222template<>
223inline std::string ConvertValToString<float>(float value) {
224 std::stringstream ret;
225 // special case for infinity and Nan
226 if (std::isinf(value))
227 ret << (value > 0 ? "std::numeric_limits<float>::infinity()" :
228 "-std::numeric_limits<float>::infinity()");
229 else if (std::isnan(value))
230 ret << "std::numeric_limits<float>::quiet_NaN()";
231 else {
232 ret << std::setprecision(std::numeric_limits<float>::max_digits10);
233 ret << value;
234 }
235 return ret.str();
236}
237// double specialization
238template<>
239inline std::string ConvertValToString<double>(double value) {
240 std::stringstream ret;
241 // special case for infinity and Nan
242 if (std::isinf(value))
243 ret << (value > 0 ? "std::numeric_limits<double>::infinity()" :
244 "-std::numeric_limits<double>::infinity()");
245 else if (std::isnan(value))
246 ret << "std::numeric_limits<double>::quiet_NaN()";
247 else {
248 ret << std::setprecision(std::numeric_limits<double>::max_digits10);
249 ret << value;
250 }
251 return ret.str();
252}
253// int64_t specialization for INT64_MIN
254template<>
255inline std::string ConvertValToString<int64_t>(int64_t value) {
256 std::stringstream ret;
257 if (value == INT64_MIN)
258 ret << "INT64_MIN";
259 else
260 ret << std::to_string(value);
261 return ret.str();
262}
263
264
265// convert list of values in a string taking into account the precision
266template<class T>
267std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1) {
268 std::stringstream ret;
269 ret << "{ ";
270 for (size_t i = 0; i < std::min(n,maxprint); i++) {
271 ret << ConvertValToString(data[i]);
272 if (i < n-1) ret << ", ";
273 if (i < n-1 && i == maxprint-1) ret << "..... ";
274 }
275 ret << "}";
276 return ret.str();
277}
278template<class T>
279std::string ConvertValuesToString(const std::vector<T> & data, size_t maxprint = 5) {
280 return ConvertValuesToString(data.size(), data.data(), maxprint);
281}
282
284public:
285 InitializedTensor() = default;
286 InitializedTensor(ETensorType type, std::span<std::size_t> shape, std::shared_ptr<void> data, bool typeConstant = false)
287 : fConstant(typeConstant), fType{type}, fShape{shape.begin(), shape.end()}, fData{data}
288 {
289 }
290
291 ETensorType const &type() const { return fType; }
292 std::vector<std::size_t> const &shape() const { return fShape; }
293 std::shared_ptr<void> const &sharedptr() const { return fData; }
294 // query if tensor comes from a Constant operator
295 bool IsConstantTensor() const { return fConstant;}
296 // query if tensor needs to be written in a weight file. Constant tensors are not written in a separate file
297 bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;}
298 // check if a Tensor is Writable (need to be written in the file or in the generated code (e.g. as a constant tensor)
299 // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in
300 // the generated code
301 bool IsNotWritable() const { return fIsNotWritable; }
302 // set not writable initialized tensors - i.e. tensor that must not be written in a file
304 // set writable initialized tensors - i.e. tensor that must be written in a file
305 void SetWritable() { fIsNotWritable = false;}
306 // set as constant (needed for non-float initialized tensors)
307 void SetConstant() { fConstant = true;}
308
309 template <class T = void>
310 T const *data() const
311 {
312 return static_cast<T const *>(fData.get());
313 }
314
316 {
317 // We only calculate fSize here, because it is only used for IO to know
318 // the size of the persistent data.
319 fSize = 1;
320 for (std::size_t item : fShape) {
321 fSize *= static_cast<int>(item);
322 }
323 // get size in bytes
325 fPersistentData = static_cast<char *>(fData.get());
326 }
328 {
329 // If there is no persistent data, do nothing
330 if (fSize == 0 || fPersistentData == nullptr) {
331 return;
332 }
333
334 // Nothing to be done if the pointed-to data is the same
335 if (fPersistentData == static_cast<char *>(fData.get())) {
336 return;
337 }
338
339 // Initialize the shared_ptr
340 fData = std::shared_ptr<void>{malloc(fSize), free};
341 std::memcpy(fData.get(), fPersistentData, fSize);
342
343 // Make sure the data read from disk doesn't leak and delete the
344 // persistent data
345 delete[] fPersistentData;
346 fPersistentData = nullptr;
347 fSize = 0;
348 }
349
350private:
351 bool fConstant = false; ///< Flag specifying if tensor is a Constant one (coming from a Constant operator)
352 bool fIsNotWritable = false; ///< Flag to indicate that tensor values do not need to be written as weight or generated code
353 ETensorType fType; ///< Encodes the type of the data
354 std::vector<std::size_t> fShape; ///< The shape of the data in terms of elements in each dimension
355 std::shared_ptr<void> fData; ///<! Transient shared data
356 int fSize = 0; ///< The size of the persistent data in bytes (not number of elements!)
357 char *fPersistentData = nullptr; ///<[fSize] Persistent version of the data
358};
359
360template <typename T>
362 if (std::is_same<T, float>::value) return ETensorType::FLOAT;
363 if (std::is_same<T, uint8_t>::value) return ETensorType::UINT8;
364 if (std::is_same<T, int8_t>::value) return ETensorType::INT8;
365 if (std::is_same<T, uint16_t>::value) return ETensorType::UINT16;
366 if (std::is_same<T, int16_t>::value) return ETensorType::INT16;
367 if (std::is_same<T, int32_t>::value) return ETensorType::INT32;
368 if (std::is_same<T, int64_t>::value) return ETensorType::INT64;
369 if (std::is_same<T, std::string>::value) return ETensorType::STRING;
370 if (std::is_same<T, bool>::value) return ETensorType::BOOL;
371 //float16 unimplemented
372 if (std::is_same<T, double>::value) return ETensorType::DOUBLE;
373 if (std::is_same<T, uint32_t>::value) return ETensorType::UINT32;
374 if (std::is_same<T, uint64_t>::value) return ETensorType::UINT64;
375 //complex 64, 28, bfloat 16 unimplemented
376}
377
378namespace UTILITY{
379
380
381
382// clean operator and tensor names
383std::string Clean_name(std::string input_tensor_name);
384
385// Check if two shapes are equal
386bool AreSameShape(const std::vector<size_t>&, const std::vector<size_t>&);
387bool AreSameShape(const std::vector<size_t>&, const std::vector<Dim>&);
388bool AreSameShape(const std::vector<Dim>&, const std::vector<Dim>&);
389
390
391// Multidirectional broadcast a list of tensors to the same shape
392std::vector<size_t> MultidirectionalBroadcastShape(std::vector<std::vector<size_t>>);
393
394// Multidirectional broadcast two shapes to the same shape
395
396std::pair<int, std::vector<size_t>> MultidirectionalBroadcastShape(std::vector<size_t> &, std::vector<size_t> &);
397std::vector<size_t> UnidirectionalBroadcastShape(std::vector<size_t> &, std::vector<size_t> &);
398
399std::pair<int, std::vector<Dim>> MultidirectionalBroadcastShape(std::vector<Dim> &, std::vector<Dim> &);
400
401
402
403template<typename T>
404T* BroadcastConvBias(const T* data, const size_t channel, const std::vector<size_t>& targetShape) {
405 size_t size = targetShape.size();
406 if (targetShape[1] != channel) {
407 std::stringstream ss;
408 ss << "TMVA::SOFIE - Error broadcasting Conv Bias of shape {";
409 ss << std::to_string(channel);
410 ss << "} to ";
411 ss << ConvertShapeToString(targetShape);
412 throw
413 std::runtime_error(ss.str());
414 }
415
416 size_t targetLength = ConvertShapeToLength(targetShape);
417 T* newData = new T[targetLength];
418
419 if (targetLength == channel) {
420 std::copy(data, data + channel, newData);
421 return newData;
422 }
423
424 // cStride = OutDepth * outHeight * outWidth
425 size_t cStride = 1;
426 for (size_t i = 2; i < size; i++)
427 cStride *= targetShape[i];
428 // Broadcast each element of the bias to a vector of size cStride and concatenate them
429 // into a vector of size channel * cStride
430 for (size_t i = 0; i < channel; i++) {
431 std::fill(newData + i * cStride, newData + (i + 1) * cStride, data[i]);
432 }
433 // Broadcast newData[0...channel * cStride) to newData[0...batch * channel * cStride)
434 size_t batch = targetShape[0];
435 size_t bStride = channel * cStride;
436 for (size_t i = 1; i < batch; i++) {
437 std::copy(newData, newData + bStride, newData + i * bStride);
438 }
439 return newData;
440}
441
442// Broadcast a tensor from shape to targetShape according to numpy broadcasting rules
443// See more at https://numpy.org/doc/stable/user/basics.broadcasting.html
444// and https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md .
445template<typename T, class ConstContT = std::span<const T>>
446void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, T *broadcastedData) {
447 // Size of the shapes (tensor input here have shapes with same sizes, we have already added the needed ones )
448 size_t size = shape.size();
449 // Current length of the broadcasted tensor
450 size_t curLength = data.size();
451 // special case when broadcasting last dimensions (initial shapes must be the same)
452 if (size > 1 && shape.front() == targetShape.front() && shape.back() == 1) {
453 size_t bsize = targetShape.back();
454 // compute the size of the data to broadcast
455 for (int k = int(size)-2; k >=0; k--) {
456 if (shape[k] != 1) break;
457 bsize *= targetShape[k];
458 }
459 for (size_t i = 0; i < curLength; i++) {
460 std::fill(broadcastedData + i*bsize, broadcastedData + (i+1)*bsize , data[i]);
461 }
462 return;
463 }
464
465 std::copy(data.begin(), data.end(), broadcastedData);
466 // Product of the previous dimensions of targetShape
467 size_t arrayNum = 1;
468 // New broadcasted data: is this needed?
469 std::vector<T> newData(ConvertShapeToLength(targetShape));
470
471 for (size_t idx = 0; idx < size; idx++) {
472 size_t dim = shape[idx];
473 size_t targetDim = targetShape[idx];
474 if (dim == 1 && targetDim > 1) {
475 // Set the new length of the data
476 size_t newLength = curLength * targetDim;
477 // View the data as a list of arrayNum arrays of size arrayLength
478 size_t arrayLength = curLength / arrayNum;
479 // Broadcast each array dim times
480 if (arrayLength > 1) {
481 // If each array has at least two elements
482 for (size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) {
483 for (size_t targetIdx = 0; targetIdx < targetDim; targetIdx++) {
484 size_t offset = arrayIdx * arrayLength * targetDim + targetIdx * arrayLength;
485 std::copy(broadcastedData + arrayIdx * arrayLength,
486 broadcastedData + (arrayIdx + 1) * arrayLength,
487 newData.begin() + offset);
488 }
489 }
490 } else {
491 // If each array has one element
492 for (size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) {
493 std::fill(newData.begin() + arrayIdx * targetDim,
494 newData.begin() + (arrayIdx + 1) * targetDim, broadcastedData[arrayIdx]);
495 }
496 }
497 // Update current length
498 curLength = newLength;
499 // Update broadcasted data
500 std::copy(newData.begin(), newData.begin() + newLength, broadcastedData);
501 }
502 // Update the number of arrays
503 arrayNum *= targetDim;
504 }
505}
506
507// interface where we allocate a new array for broadcasted data
508template<typename T>
509T* CreateBroadcastTensor(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, size_t targetLength) {
510 // newShape is an array of size equal to dimension along which we are broadcasting the tensor
511 T* broadcastedData = new T[targetLength];
512 size_t curLength = ConvertShapeToLength(shape);
513 BroadcastTensor<T>({data, curLength}, shape, targetShape, broadcastedData);
514 return broadcastedData;
515}
516// Unidirectional broadcasting shape to targetShape// In unidirectional broadcast - only tensor B can have the shape changed not
517// tensor A - otherwise is a multidirectional broadcast
518template<typename T>
519T* UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape) {
520 // Prepend shape with ones
521 if (shape.size() < targetShape.size()) {
522 size_t targetSize = targetShape.size();
523 std::vector<size_t> newShape(targetSize, 1);
524 size_t offset = targetSize - shape.size();
525 std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
526 return CreateBroadcastTensor(data, newShape, targetShape, ConvertShapeToLength(targetShape));
527 }
528 return CreateBroadcastTensor(data, shape, targetShape, ConvertShapeToLength(targetShape));
529}
530
531// Unidirectional broadcasting shape to targetShape using a passed vector to avoid allocations
532template<typename T>
533void UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, T *broadcastedData) {
534 size_t curLength = ConvertShapeToLength(shape);
535 std::span<T> inData(const_cast<T*>(data), curLength);
536 // Prepend shape with ones
537 if (shape.size() < targetShape.size()) {
538 size_t targetSize = targetShape.size();
539 std::vector<size_t> newShape(targetSize, 1);
540 size_t offset = targetSize - shape.size();
541 std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
542 BroadcastTensor(inData, newShape, targetShape, broadcastedData);
543 }
544 BroadcastTensor(inData, shape, targetShape, broadcastedData);
545}
546
547/// compute stride of a tensor given its shape (assume layout is row-major)
548std::vector<size_t> ComputeStrideFromShape(const std::vector<size_t> & shape);
549std::vector<Dim> ComputeStrideFromShape(const std::vector<Dim> & shape);
550
551/// function to check if a >> 0 and a < MAX using a single comparison
552//// use trick casting to unsigned values so it becomes a single comparison
553inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
554 return static_cast<unsigned>(a) < static_cast<unsigned>(b);
555}
556
557
558/// im2col : efficient function to re-arrange input data of convolution to a matrix
559/// that can be used by BLAS
560/// Use trick to loop on each element of filtered region first and follow input data layout
561/// By doing this reads and writes are of consecutive data in memory and one gains in efficiency
562/// The resulting matrix will be already transposed and can be used directly in BLAS
563/// since output will be a matrix : (channels*kernel_h*kernel_w , output_h*output_w)
564/// Example: with an input matrix
565/// a1 a2 a3
566/// b1 b2 b3 and a 2x2 kernel (k1,k2,k3,k4) and padding 1 :
567/// c1 c2 c3
568/// outpout will be a matrix (4 x 16)
569/// the routine will follow output order :
570// first all elements which will be operated by k1 then k2 then k3
571/// -> ( 0 0 0 0 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 ) all elements for k1
572/// ( 0 0 0 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 ) for k2
573/// ( 0 a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 ) for k3
574/// ( a1 a2 a3 0 b1 b2 b3 0 c1 c2 c3 0 0 0 0 0 ) for k4
575///
576
577template <typename T>
578void Im2col(const T *data_im, const int channels, const int height, const int width, const int kernel_h,
579 const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w,
580 const int dilation_h, const int dilation_w, T *data_col)
581{
582 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
583 const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
584 const int channel_size = height * width;
585 for (int channel = channels; channel--; data_im += channel_size) {
586 for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
587 for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
588 int input_row = -pad_h + kernel_row * dilation_h;
589 for (int output_rows = output_h; output_rows; output_rows--) {
590 if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
591 for (int output_cols = output_w; output_cols; output_cols--) {
592 *(data_col++) = 0;
593 }
594 } else {
595 int input_col = -pad_w + kernel_col * dilation_w;
596 for (int output_col = output_w; output_col; output_col--) {
597 if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
598 *(data_col++) = data_im[input_row * width + input_col];
599 } else {
600 *(data_col++) = 0;
601 }
602 input_col += stride_w;
603 }
604 }
605 input_row += stride_h;
606 }
607 }
608 }
609 }
610}
611
612/// 3d implementation
613template <typename T>
614void Im2col_3d(const T *data_im, const int channels,
615 const int depth, const int height, const int width,
616 const int kernel_d, const int kernel_h, const int kernel_w,
617 const int pad_d, const int pad_h, const int pad_w,
618 const int stride_d, const int stride_h, const int stride_w,
619 const int dilation_d, const int dilation_h, const int dilation_w, T *data_col)
620{
621 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
622 const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
623 const int output_d = (depth + 2 * pad_d - (dilation_d * (kernel_d - 1) + 1)) / stride_d + 1;
624 const int channel_size = height * width * depth;
625 // assume data are c x d x h x w
626 for (int channel = channels; channel--; data_im += channel_size) {
627 for (int kernel_depth = 0; kernel_depth < kernel_d; kernel_depth++) {
628 for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
629 for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
630 int input_dep = -pad_d + kernel_depth * dilation_d;
631 for (int output_dep = output_d; output_dep; output_dep--) {
632 if (!is_a_ge_zero_and_a_lt_b(input_dep, depth)) {
633 for (int output_rows = output_h; output_rows; output_rows--) {
634 for (int output_cols = output_w; output_cols; output_cols--) {
635 *(data_col++) = 0;
636 }
637 }
638 } else {
639 int input_row = -pad_h + kernel_row * dilation_h;
640 for (int output_rows = output_h; output_rows; output_rows--) {
641 if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
642 for (int output_cols = output_w; output_cols; output_cols--) {
643 *(data_col++) = 0;
644 }
645 } else {
646 int input_col = -pad_w + kernel_col * dilation_w;
647 for (int output_col = output_w; output_col; output_col--) {
648 if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
649 *(data_col++) = data_im[input_dep * width * height + input_row * width + input_col];
650 } else {
651 *(data_col++) = 0;
652 }
653 input_col += stride_w;
654 }
655 }
656 input_row += stride_h;
657 }
658 }
659 input_dep += stride_d;
660 }
661 }
662 }
663 }
664 }
665}
666
667template <typename Dtype>
668void col2im(const Dtype* data_col, const int channels,
669 const int height, const int width, const int kernel_h, const int kernel_w,
670 const int pad_h, const int pad_w,
671 const int stride_h, const int stride_w,
672 const int dilation_h, const int dilation_w,
673 Dtype* data_im) {
674 // note that output data_im needs to be set to zero value!!!!
675 std::fill(data_im, data_im + height * width * channels, 0.);
676 //caffe_set(height * width * channels, Dtype(0), data_im);
677 // data_im must be a zero vector
678 //const Dtype * data_col_0 = data_col;
679 const int output_h = (height + 2 * pad_h -
680 (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
681 const int output_w = (width + 2 * pad_w -
682 (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
683 const int channel_size = height * width;
684 for (int channel = channels; channel--; data_im += channel_size) {
685 for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
686 for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
687 int input_row = -pad_h + kernel_row * dilation_h;
688 for (int output_rows = output_h; output_rows; output_rows--) {
689 if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
690 data_col += output_w;
691 } else {
692 int input_col = -pad_w + kernel_col * dilation_w;
693 for (int output_col = output_w; output_col; output_col--) {
694 if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
695 //assert(input_row*width+input_col < height * width * channels);
696 //assert(data_col - data_col_0 < output_h*output_w*channels);
697 // std::cout << "COL2IM: input_row" << " " << input_row << " " << input_col
698 // << " <---- " << data_col - data_col_0 << " values: "
699 // << data_im[input_row * width + input_col] << " <--- " << *data_col << std::endl;
700 data_im[input_row * width + input_col] += *data_col;
701 }
702 data_col++;
703 input_col += stride_w;
704 }
705 }
706 input_row += stride_h;
707 }
708 }
709 }
710 }
711 //std::cout << "finishing col2imp" << std::endl;
712}
713
714} // end namespace UTILITY
715
716namespace BLAS{
717extern "C" void sgemm_(const char * transa, const char * transb, const int * m, const int * n, const int * k,
718 const float * alpha, const float * A, const int * lda, const float * B, const int * ldb,
719 const float * beta, float * C, const int * ldc);
720}//BLAS
721
722
723struct GNN_Data {
724 RTensor<float> node_data; // the node feature data, tensor with shape (num_nodes, num_node_features)
725 RTensor<float> edge_data; // the edge feature data, tensor with shape (num_edges, num_edge_features)
726 RTensor<float> global_data; // the global features, tensor with shape (1, num_global_features)
727 RTensor<int> edge_index; // the edge index (receivers and senders for each edge), tensor with shape (2, num_edges)
728 // edge_index[0,:] are the receivers and edge_index[1,:] are the senders
729
730
731 // need to have default constructor since RTensor has not one
733
734};
735
736template<typename T>
738{
739 // concatenate tensor along axis. Shape must be the same except in the dimension of the concatenated axis
740 if (t1.GetMemoryLayout() != t2.GetMemoryLayout())
741 throw std::runtime_error("TMVA RTensor Concatenate - tensors have different memory layout");
742 auto & shape1 = t1.GetShape();
743 auto & shape2 = t2.GetShape();
744 if (t1.GetSize()/shape1[axis] != t2.GetSize()/shape2[axis]) {
745 std::cout << "axis " << axis << " sizes " << t1.GetSize() << " " << t2.GetSize() << " ";
746 std::cout << "shape 1 : " << ConvertShapeToString(t1.GetShape());
747 std::cout << " shape 2 : " << ConvertShapeToString(t2.GetShape()) << std::endl;
748 throw std::runtime_error("TMVA RTensor Concatenate - tensors have incompatible shapes");
749 }
750 std::vector<size_t> outShape = shape1;
751 outShape[axis] = shape1[axis] + shape2[axis];
752 TMVA::Experimental::RTensor<T> tout(outShape, t1.GetMemoryLayout());
753 if (t1.GetMemoryLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
754 throw std::runtime_error("TMVA RTensor Concatenate is not yet supported for column major tensors");
755 }
756
757 auto & stride1 = t1.GetStrides();
758 auto & stride2 = t2.GetStrides();
759 auto & outStride = tout.GetStrides();
760
761 size_t s1 = (axis > 0) ? stride1[axis-1] : t1.GetSize(); // block size to copy from first tensor
762 size_t s2 = (axis > 0) ? stride2[axis-1] : t2.GetSize(); // block size to copy from second tensor
763 size_t sout = (axis > 0) ? outStride[axis-1] : tout.GetSize();
764 size_t nb = t1.GetSize()/s1;
765 for (size_t i = 0; i < nb; i++) {
766 std::copy(t1.GetData() + i*s1, t1.GetData() + (i+1)*s1, tout.GetData() + i * sout );
767 std::copy(t2.GetData() + i*s2, t2.GetData() + (i+1)*s2, tout.GetData() + i * sout + s1 );
768 }
769
770 return tout;
771}
772
773
774inline GNN_Data Concatenate(GNN_Data & data1, GNN_Data & data2, int axis = 0) {
775 GNN_Data out;
776 out.node_data = Concatenate(data1.node_data,data2.node_data, axis);
777 out.edge_data = Concatenate(data1.edge_data,data2.edge_data, axis);
778 out.global_data = Concatenate<float>(data1.global_data,data2.global_data, axis-1);
779 // assume sender/receivers of data1 and data2 are the same
780 out.edge_index = data1.edge_index.Copy();
781 return out;
782}
783
784inline GNN_Data Copy(const GNN_Data & data) {
785 GNN_Data out;
786 out.node_data = RTensor<float>(data.node_data.GetShape());
787 out.edge_data = RTensor<float>(data.edge_data.GetShape());
788 out.global_data = RTensor<float>(data.global_data.GetShape());
789 out.edge_index = RTensor<int>(data.edge_index.GetShape());
790 std::copy(data.node_data.GetData(), data.node_data.GetData()+ data.node_data.GetSize(), out.node_data.GetData());
791 std::copy(data.edge_data.GetData(), data.edge_data.GetData()+ data.edge_data.GetSize(), out.edge_data.GetData());
792 std::copy(data.global_data.GetData(), data.global_data.GetData()+ data.global_data.GetSize(), out.global_data.GetData());
793 std::copy(data.edge_index.GetData(), data.edge_index.GetData()+ data.edge_index.GetSize(), out.edge_index.GetData());
794 return out;
795}
796
797inline void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int k, float alpha, const float *A,
798 const float *B, float beta, const float *C)
799{
800 char ct = 't';
801 char cn = 'n';
802 const int *lda = transa ? &k : &m;
803 const int *ldb = transb ? &n : &k;
804 const int *ldc = &m;
805 if (C != nullptr) {
806 std::copy(C, C + m * n, output);
807 }
808 TMVA::Experimental::SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb,
809 &beta, output, ldc);
810}
811
812inline void Fill(float *output, float value, int size)
813{
814 std::fill(output, output + size, value);
815}
816
817template <class T>
818inline void Copy(T *output, T const *input, int size)
819{
820 std::copy(input, input + size, output);
821}
822
823inline void Relu(float *output, float const *input, int size)
824{
825 for (int i = 0; i < size; i++) {
826 output[i] = (input[i] > 0.0f) ? input[i] : 0.0f;
827 }
828}
829// function to read float from the file dealing with inf and nan values
830inline float ParseFloatToken (const std::string & s) {
831 if (s == "inf") return std::numeric_limits<float>::infinity();
832 if (s == "-inf") return -std::numeric_limits<float>::infinity();
833 if (s == "nan") return std::numeric_limits<float>::quiet_NaN();
834 return std::stof(s);
835}
836
837template <class T>
838void ReadTensorFromStream(std::istream &is, T &target, std::string const &expectedName, std::size_t expectedLength)
839{
840 std::string name;
841 std::size_t length;
842 is >> name >> length;
843 if (name != expectedName) {
844 std::string err_msg =
845 "TMVA-SOFIE failed to read the correct tensor name; expected name is " + expectedName + " , read " + name;
846 throw std::runtime_error(err_msg);
847 }
848 if (length != expectedLength) {
849 std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is " +
850 std::to_string(expectedLength) + " , read " + std::to_string(length);
851 throw std::runtime_error(err_msg);
852 }
853 std::string token;
854 for (size_t i = 0; i < length; ++i) {
855 is >> token;
856 target[i] = ParseFloatToken(token);
857 }
858 if (is.fail()) {
859 throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor " + expectedName);
860 }
861}
862
863//Utility functions to generate code
864void EmitNestedLoops(std::stringstream &out, size_t loopRank, const std::vector<Dim> shape);
865void CloseNestedLoops(std::stringstream &out, size_t loopRank);
866
867
868// code for the memory greeding allocations
870 int begin; // start time (op index) lifetime
871 int end; // end time lifetime
872 size_t size; // size of tensors in bytes
873};
874
876 std::size_t total_bytes = 0; // total memory needed
877 std::vector<size_t> offsets; // resulted offsets for each tensor
878};
879
880/// Greedy best-fit planner with coalescing free list.
881MemoryResult OrganizeMemory(const std::vector<TensorLifeInfo> & tensorsInfo );
882
883// Simple Dimension classes ans helpers to add constexpr meta info on input
884// tensors to the emitted code.
885struct SingleDim {
886 enum class Kind {
889 };
890
892 std::size_t dim;
893 std::string_view name;
894
895 constexpr SingleDim(std::size_t v) : kind(Kind::Static), dim(v), name() {}
896 constexpr SingleDim(const char *v) : kind(Kind::Symbolic), dim(0), name(v) {}
897};
898
901 std::size_t size;
902
903 constexpr std::size_t total_size() const
904 {
905 std::size_t result = 1;
906 for (std::size_t i = 0; i < size; ++i) {
907 result *= data[i].dim;
908 }
909 return result;
910 }
911};
912
913template<class Arr>
914constexpr TensorDims makeDims(Arr const &arr)
915{
916 return TensorDims{arr.data(), arr.size()};
917}
918
919} // namespace SOFIE
920} // namespace Experimental
921} // namespace TMVA
922
923#endif //TMVA_SOFIE_COMMON
true
Register systematic variations for multiple existing columns using auto-generated tags.
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char name[80]
Definition TGX11.cxx:148
#define free
Definition civetweb.c:1578
#define malloc
Definition civetweb.c:1575
RTensor is a container with contiguous memory and shape information.
Definition RTensor.hxx:163
RTensor< Value_t, Container_t > Copy(MemoryLayout layout=MemoryLayout::RowMajor) const
Copy RTensor to new object.
Definition RTensor.hxx:564
Value_t * GetData()
Definition RTensor.hxx:245
RTensor is a container with contiguous memory and shape information.
Definition RTensor.hxx:163
MemoryLayout GetMemoryLayout() const
Definition RTensor.hxx:249
const Shape_t & GetStrides() const
Definition RTensor.hxx:244
std::size_t GetSize() const
Definition RTensor.hxx:242
const Shape_t & GetShape() const
Definition RTensor.hxx:243
std::shared_ptr< void > const & sharedptr() const
std::shared_ptr< void > fData
! Transient shared data
ETensorType fType
Encodes the type of the data.
std::vector< std::size_t > const & shape() const
char * fPersistentData
[fSize] Persistent version of the data
std::vector< std::size_t > fShape
The shape of the data in terms of elements in each dimension.
bool fIsNotWritable
Flag to indicate that tensor values do not need to be written as weight or generated code.
bool fConstant
Flag specifying if tensor is a Constant one (coming from a Constant operator)
InitializedTensor(ETensorType type, std::span< std::size_t > shape, std::shared_ptr< void > data, bool typeConstant=false)
int fSize
The size of the persistent data in bytes (not number of elements!)
const Int_t n
Definition legend1.C:16
void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
void Im2col_3d(const T *data_im, const int channels, const int depth, const int height, const int width, const int kernel_d, const int kernel_h, const int kernel_w, const int pad_d, const int pad_h, const int pad_w, const int stride_d, const int stride_h, const int stride_w, const int dilation_d, const int dilation_h, const int dilation_w, T *data_col)
3d implementation
T * BroadcastConvBias(const T *data, const size_t channel, const std::vector< size_t > &targetShape)
std::vector< size_t > UnidirectionalBroadcastShape(std::vector< size_t > &, std::vector< size_t > &)
void col2im(const Dtype *data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype *data_im)
void BroadcastTensor(ConstContT data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape, T *broadcastedData)
std::string Clean_name(std::string input_tensor_name)
bool is_a_ge_zero_and_a_lt_b(int a, int b)
function to check if a >> 0 and a < MAX using a single comparison / use trick casting to unsigned val...
std::vector< size_t > MultidirectionalBroadcastShape(std::vector< std::vector< size_t > >)
T * UnidirectionalBroadcast(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape)
void Im2col(const T *data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, T *data_col)
im2col : efficient function to re-arrange input data of convolution to a matrix that can be used by B...
T * CreateBroadcastTensor(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape, size_t targetLength)
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
MemoryResult OrganizeMemory(const std::vector< TensorLifeInfo > &tensorsInfo)
Greedy best-fit planner with coalescing free list.
constexpr TensorDims makeDims(Arr const &arr)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
std::string ConvertValToString< double >(double value)
void ReadTensorFromStream(std::istream &is, T &target, std::string const &expectedName, std::size_t expectedLength)
std::string ConvertValuesToString(size_t n, const T *data, size_t maxprint=-1)
std::vector< Dim > ConvertShapeToDim(const std::vector< size_t > &shape)
Convert shape from integer format to dynamic one (based on Dim)
constexpr size_t GetTypeSize(ETensorType type)
ETensorType GetTemplatedType(T)
void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int k, float alpha, const float *A, const float *B, float beta, const float *C)
std::string ConvertValToString< float >(float value)
void Fill(float *output, float value, int size)
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
std::string ConvertTypeToString(ETensorType type)
void Relu(float *output, float const *input, int size)
ETensorType ConvertStringToType(std::string type)
TMVA::Experimental::RTensor< T > Concatenate(TMVA::Experimental::RTensor< T > &t1, TMVA::Experimental::RTensor< T > &t2, int axis=0)
float ParseFloatToken(const std::string &s)
std::ostream & operator<<(std::ostream &os, const Dim &d)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
void EmitNestedLoops(std::stringstream &out, size_t loopRank, const std::vector< Dim > shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
void CloseNestedLoops(std::stringstream &out, size_t loopRank)
std::string ConvertValToString(T value)
std::string ConvertValToString< int64_t >(int64_t value)
bool IsInteger(const std::string &s)
GNN_Data Copy(const GNN_Data &data)
create variable transformations
bool operator!=(const Dim &rhs) const
bool operator==(const Dim &rhs) const
Dim(const std::string &p, size_t d=0)
std::ostream & operator<<(std::ostream &os) const
std::map< size_t, TensorMemoryInfo > total_stack
std::map< size_t, size_t > available_stack
constexpr SingleDim(std::size_t v)
constexpr SingleDim(const char *v)
constexpr std::size_t total_size() const
void merge(const TensorMemoryInfo &other)
TensorMemoryInfo split(const std::string_view new_name, size_t new_size)
TMarker m
Definition textangle.C:8
auto * t1
Definition textangle.C:20