Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_Conv.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONV
2#define TMVA_SOFIE_ROPERATOR_CONV
3
5#include "TMVA/ROperator.hxx"
6#include "TMVA/RModel.hxx"
7
8#include <memory>
9#include <sstream>
10#include <algorithm>
11#include <stdexcept>
12#include <vector>
13#include <cassert>
14
15namespace TMVA {
16namespace Experimental {
17namespace SOFIE {
18
19template<typename T>
21{
22private:
23 std::string fAttrAutopad;
24 std::vector<size_t> fAttrDilations;
25 size_t fAttrGroup;
26 std::vector<size_t> fAttrKernelShape;
27 std::vector<size_t> fAttrPads;
28 std::vector<size_t> fAttrStrides;
29
30 std::string fNX;
31 std::string fNW;
32 std::string fNB;
33 std::string fNB2; // bias tensor name after broadcasting
34 std::string fNY;
35
36 std::string convK;
37 std::string imcol;
38
39 std::vector<size_t> fShapeX;
40 std::vector<size_t> fShapeW;
41 std::vector<size_t> fShapeB;
42 std::vector<size_t> fShapeY;
43
44 std::string fType;
45
46 size_t fDim; // dimension of the convolution
47
48
49public:
50
52
53 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
54 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
55 std::vector<size_t> strides, std::string nameX, std::string nameW,
56 std::string nameB, std::string nameY):
58 fAttrPads(pads), fAttrStrides(strides),
59 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
60 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
61 {
62 if(std::is_same<T, float>::value) {
63 fType = "float";
64 } else {
65 throw
66 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
67 }
70 }
71
72 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
73 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
74 std::vector<size_t> strides, std::string nameX, std::string nameW,
75 std::string nameY):
77 fAttrPads(pads), fAttrStrides(strides),
78 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
79 {
80 if(std::is_same<T, float>::value) {
81 fType = "float";
82 } else {
83 throw
84 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
85 }
88 }
89
90 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) {
91 ETensorType out = input[0];
92 return {out};
93 }
94
95 // function returning output shape given input
96 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) {
97 // shape of convolution input has to be (according to ONNX): N x C x H x W
98 // Where N : batch size, C : input channels, H : input height, W : input width
99
100 if (input.size() > 3 ) {
101 throw
102 std::runtime_error("TMVA SOFIE Conv Op Shape inference need 2 or 3 input tensors");
103 }
104 for(size_t i = 0; i < input.size(); i++) {
105 if (input[i].size() -2 != fDim) {
106 throw
107 std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid inputs ");
108 }
109 }
110
111 if (fAttrGroup == 0) {
112 fAttrGroup = input[0][1] / input[1][1];
113 }
114
115 // kernel shape
116 size_t k1 = ((fAttrKernelShape.empty())? input[1][2] : fAttrKernelShape[0]);
117 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? input[1][3] : fAttrKernelShape[1]) : 1;
118 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? input[1][4] : fAttrKernelShape[2]) : 1;
119
120
121 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
122 size_t i2 = (fDim > 2) ? 4 : 3;
123 size_t i3 = 5;
124
125 if (fAttrDilations.empty()) {
126 fAttrDilations = {1, 1, 1};
127 }
128 fAttrDilations.resize(3);
129 if (fDim < 3) {
130 fAttrDilations.resize(3, 1);
131 }
132 // Shape of the kernel
133 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
134 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
135 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
136
137 if (fAttrAutopad == "NOTSET") {
138 if (fAttrPads.empty()) {
139 fAttrPads = {1, 1, 1, 1, 1, 1};
140 }
141 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
142 if (fDim == 1)
144 else if (fDim == 2)
146 else if (fDim == 3)
148 fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2};
149 // add extra padding at beginning or end (depending if SAME_UPPER or SAME_LOWER)
150 // need to check this!
151 if (fAttrKernelShape[0] % 2 == 1) {
152 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++;
153 }
154 if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) {
155 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++;
156 }
157 if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) {
158 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++;
159 }
160 } else if (fAttrAutopad != "VALID") {
161 throw
162 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
163 }
164 // to be sure pad is vector of size 6
165 if (fDim < 3) fAttrPads.resize(6, 0);
166
167 if (fAttrStrides.empty()) {
168 fAttrStrides = {1, 1, 1};
169 }
170 if (fDim < 3)
171 fAttrStrides.resize(3, 1);
172
173
174 size_t input1 = input[0][2];
175 size_t input2 = (fDim > 1) ? input[0][3] : 1;
176 size_t input3 = (fDim > 2) ? input[0][4] : 1;
177
178 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
179 size_t output1 = (input1 + pad1 - fAttrKernelShape[0]) / fAttrStrides[0] + 1;
180
181 size_t batch_size = input[0][0]; // first element in input tensor
182 size_t output_channels = input[1][0]; // first element in weight tensor
183
184 std::vector<std::vector<size_t>> ret({{ batch_size, output_channels, output1 }});
185
186 if (fDim == 1)
187 return ret;
188
189 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
190 size_t output2 = (input2 + pad2 - fAttrKernelShape[1]) / fAttrStrides[1] + 1;
191 // output is N x M x OH x OW
192 ret[0].push_back(output2);
193 if (fDim == 2)
194 return ret;
195
196 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
197 size_t output3 = (input3 + pad3 - fAttrKernelShape[2] ) / fAttrStrides[2] + 1;
198
199 // output is N x M x OH x OW x OD
200 ret[0].push_back(output3);
201 return ret;
202 }
203
204 void Initialize(RModel& model) override {
205 fUseSession = model.UseSession();
206 if (!model.CheckIfTensorAlreadyExist(fNX)) {
207 throw
208 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
209 }
210 fShapeX = model.GetTensorShape(fNX);
211 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
212 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
213 throw
214 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
215 }
216 fDim = fShapeX.size() - 2;
217 if (!model.CheckIfTensorAlreadyExist(fNW)) {
218 throw
219 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
220 }
221 fShapeW = model.GetTensorShape(fNW);
222 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
223 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
224 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
225 }
228 if (fNB != "") {
229 if (!model.CheckIfTensorAlreadyExist(fNB)) {
230 throw
231 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
232 }
233 fShapeB = model.GetTensorShape(fNB);
234 std::vector<size_t> targetShape(fShapeY.begin() + 1, fShapeY.end());
236 if (broadcast_needed) {
238 // make bias shape equal to Y shape by adding 1
239 if (fShapeB.size() < 1)
240 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
241 // we assume bias tensor dimension is equal to number of filters that is the second dimension in
242 // the output tensor
243 if (fShapeB[0] != fShapeY[1])
244 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
246 if (fType != "float")
247 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
248 // here is the actual broadcasting
249 if (!fUseSession) {
250 std::vector<size_t> shape(fDim + 1, 1);
251 shape[0] = fShapeB[0];
252 std::shared_ptr<void> new_data_ptr(
253 UTILITY::UnidirectionalBroadcast<float>(static_cast<float *>(original_data.get()), shape, targetShape),
254 std::default_delete<float[]>());
256 fShapeB = model.GetTensorShape(fNB);
257 fNB2 = fNB; // use same name
258 }
259 else {
260 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
261 // we need to add a new intermediate tensor for broadcasted bias tensor
262 fNB2 = fNB + "bcast";
264 }
265 }
266 }
267
268 size_t outputChannelSize = fShapeY[2]; // size/channel = D * H * W
269 size_t kernelSize = fAttrKernelShape[0];
270 for (size_t i = 1; i < fDim; i++) {
271 outputChannelSize *= fShapeY[2 + i];
273 }
274
275 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
276 std::vector<size_t> shape2 = {fShapeW[1], kernelSize, outputChannelSize};
279 convK = fNX +"_f";
280 imcol = fNX +"_xcol";
281 fOutputTensorNames.emplace_back(convK);
282 fOutputTensorNames.emplace_back(imcol);
283 }
284
285 std::string GenerateInitCode() {
286 std::stringstream out;
287 // Generate initialization code for broadcasting of bias tensor
288 if (!fNB2.empty()) {
289 // include a separate scope to avoid defining unique operator temp variables
290 std::vector<size_t> shape(fDim + 1, 1);
291 shape[0] = fShapeB[0];
292 std::vector<size_t> targetShape(fShapeY.begin() + 1, fShapeY.end());
293 out << SP << "{\n";
294 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
295 << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
296 out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
297 out << SP << SP << "delete[] data;\n";
298 out << SP << "}\n";
299 }
300 return out.str();
301 }
302
303 // Generate code for Session data members (e.g. internal vectors)
304 virtual std::string GenerateSessionMembersCode(std::string opName) {
305
306 size_t outputChannelSize = fShapeY[2]; // size/channel = D * H * W
307 size_t kernelSize = fAttrKernelShape[0];
308 for (size_t i = 1; i < fDim; i++) {
309 outputChannelSize *= fShapeY[2 + i];
311 }
312
313 opName = "op_" + opName;
314 std::stringstream out;
315 // matrix with convolution kernels
316 // out << "std::vector<" << fType << "> fVec_" << opName << "_f = std::vector<" << fType << ">("
317 // << fShapeW[0] * fShapeW[1] * kernelSize << ");\n";
318 // // output matrix of im2col
319 // out << "std::vector<" << fType << "> fVec_" << opName << "_xcol = std::vector<" << fType << ">("
320 // << fShapeW[1] * kernelSize * outputChannelSize << ");\n";
321 // out << "\n";
322
323 return out.str();
324 }
325
326 std::string Generate(std::string OpName) {
327 OpName = "op_" + OpName;
328
329 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
330 throw
331 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
332 }
333
334 std::stringstream out;
335 size_t bsize = fShapeX[0];
336 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
337 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
338 size_t kWidth = fShapeW[fDim+1]; // kernel width
339 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth
340 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height
341 size_t iWidth = fShapeX[fDim+1]; // input width
342 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
343 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
344 size_t oWidth = fShapeY[fDim+1]; // output width
345
346 out << "\n//---- operator Conv " << OpName << "\n";
347
348 // create first matrix with convolution kernels
349 if (!fUseSession)
350 out << SP << fType << " tensor_" << fNX << "_f[" << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] << "] = {0};\n";
351
352 // vectorize the (dilated)convolution kernels into a matrix
353 // no need to transpose the matrix
354 // to fix for 1d and 3d
355
356 size_t id = (fDim > 2) ? fDim-3 : 2;
357 size_t ih = (fDim > 1) ? fDim-2 : 1;
358 size_t iw = fDim-1;
359
360 size_t wstrideDil = fAttrDilations[iw];
361 size_t hstride = kWidth;
362 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; // stride dilated in the height
363 size_t dstride = kHeight * kWidth;
365 size_t icstride = kHeight * kWidth * kDepth;
367 size_t ocstride = fShapeW[1] * icstride;
368 size_t ocstrideDil = fShapeW[1] * icstrideDil;
369
370 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
371 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
372 if (fDim > 2)
373 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
374 if (fDim > 1)
375 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
376 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
377
378 out << SP << SP << SP << SP << SP << "tensor_" <<fNX << "_f[oc * "
379 << ocstrideDil << " + ic * " << icstrideDil;
380 if (fDim > 2) out << " + kd * " << dstrideDil;
381 if (fDim > 1) out << " + kh * " << hstrideDil;
382 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
383 if (fDim > 2) out << " + kd * " << dstride;
384 if (fDim > 1) out << " + kh * " << hstride;
385 out << " + kw ];\n";
386
387 out << SP << SP << SP << SP << "}\n";
388 if (fDim > 1) out << SP << SP << SP << "}\n";
389 if (fDim > 2) out << SP << SP << SP << "}\n";
390 out << SP << SP << "}\n";
391 out << SP << "}\n";
392
393 //out << SP << "char " << OpName << "_transA = 'T';\n";
394 out << SP << "char " << OpName << "_transA = 'N';\n";
395 out << SP << "char " << OpName << "_transB = 'N';\n";
396 out << SP << "int " << OpName << "_m = " << oHeight * oWidth * oDepth << ";\n"; // output h*w
397 assert(fShapeY[1] == fShapeW[0]);
398 assert(fShapeW[1] == fShapeX[1] / fAttrGroup);
399 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels
400 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
401 out << SP << "float " << OpName << "_alpha = 1.0;\n";
402 out << SP << "float " << OpName << "_beta = 0.0;\n";
403
404 if (!fUseSession) {
405 out << SP << fType << " tensor_" << fNX << "_xcol["
407 << "] = {0};\n";
408 }
409
410 // Loop on batch size
411 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
412
413 // IM2COL: Unroll the input tensor
414 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
415 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
416 // (xa2,...xak+1,ya1,...yak)(......)
417 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
418 // By doing this one has consecutive memory reads and writes
419 // Resulting matrix op_xcol is (input channels * filter_h * filter_w , output_h * output_w)
420 if (fDim ==1) {
421 if (fAttrPads[0] != fAttrPads[1] ) {
422 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
423 << std::endl;
424 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
425 }
426 fAttrPads[1] = 0;
427 fAttrStrides[1] = 1;
428 }
429 if (fDim == 2) {
430 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
431 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
432 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
433 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
434 }
435 }
436 if (fDim == 3) {
437 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
438 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
439 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
440 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
441 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
442 }
443 }
444 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
445
446 if (fAttrGroup == 1) {
447 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << ";\n";
448 // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y, output_h *
449 // output_w)
450 if (fDim < 3) {
451 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
452 << " + x_offset,"
453 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
454 // dilation_w,
455 //
456 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
457 if (fDim == 1)
458 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
459 << fAttrDilations[0];
460 else // dim ==2
461 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
462 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
463 << fAttrDilations[1];
464 out << "," << "tensor_" <<fNX << "_xcol);\n\n ";
465 } else {
466 // 3d im2col
467 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
468 << " + x_offset,"
469 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
470 // dilation_d, dilation_h, dilation_w,
471 //
472 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
473 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
474 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
475 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
476 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
477 << "tensor_" << fNX << "_xcol);\n\n ";
478 }
479 // BLAS
480 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
481 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName
482 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
483 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
484 << " + out_offset, &" << OpName << "_m);\n";
485 } else {
486 // case of group convolution
487 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
488 // group)
489 // out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
490 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
491 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << " + g * "
492 << fShapeW[1] * iDepth * iHeight * iWidth << ";\n ";
493 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << " + g * "
494 << fShapeW[0] * oDepth * oHeight * oWidth / fAttrGroup << ";\n ";
495
496 if (fDim < 3) {
497 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
498 << " + x_offset,"
499 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
500 // dilation_w,
501 //
502 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
503 if (fDim == 1)
504 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
505 << fAttrDilations[0];
506 else // dim ==2
507 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
508 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
509 << fAttrDilations[1];
510 out << ", tensor_" << fNX << "_xcol);\n\n ";
511 } else {
512 // 3d im2col
513 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
514 << " + x_offset,"
515 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
516 // dilation_d, dilation_h, dilation_w,
517 //
518 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
519 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
520 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
521 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
522 << "_xcol);\n\n ";
523 }
524
525 // BLAS
526 // n must be divided by the number of groups
527 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
528 // offset g must be g * k * n
529 out << SP << SP << SP << "size_t offset_f = g * "
531 << ";\n";
532 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
533 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
534 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
535 out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
536 << " + out_offset"
537 << ", &" << OpName << "_m);\n";
538
539 out << SP << SP << "}\n"; // end of group loop
540 }
541
542 if (fNB2 != "") {
543 out << SP << "int " << OpName << "_size = " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
544 out << SP << "float " << OpName << "_gamma = 1.0;\n";
545 out << SP << "int " << OpName << "_incx = 1;\n";
546 out << SP << "int " << OpName << "_incy = 1;\n";
547
548 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
549 << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
550
551 }
552 out << SP << "}\n"; // end of batch size loop
553
554 return out.str();
555 }
556
557 /*! \brief Returns the blas routines needed to compile the generated code
558 */
559 std::vector<std::string> GetBlasRoutines() { return { std::string("Gemm"), std::string("Axpy") }; }
560};
561
562} // namespace SOFIE
563} // namespace Experimental
564} // namespace TMVA
565
566#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:94
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:227
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:122
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:56
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:288
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:279
virtual std::string GenerateSessionMembersCode(std::string opName)
std::string Generate(std::string OpName)
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameB, std::string nameY)
void Initialize(RModel &model) override
ROperator_Conv(std::string autopad, std::vector< size_t > dilations, size_t group, std::vector< size_t > kernelShape, std::vector< size_t > pads, std::vector< size_t > strides, std::string nameX, std::string nameW, std::string nameY)
std::vector< std::string > GetBlasRoutines()
Returns the blas routines needed to compile the generated code.
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input)
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input)
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:46
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:42
bool fUseSession
flag to identify if using the session class
Definition ROperator.hxx:43
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:47
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
std::string ConvertShapeToString(std::vector< size_t > shape)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations