Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
17 int64_t fAttrAxis;
20
21 std::string fNX;
22 std::string fNScale;
23 std::string fNB;
24 std::string fNY;
25 std::string fNMean;
26 std::string fNInvStdDev;
27
28 std::string fNCastedX;
29 std::string fNNormalizedX;
30 std::string fNBroadcastedB;
31
32 std::vector<size_t> fShapeX;
33 std::vector<size_t> fShapeScale;
34 std::vector<size_t> fShapeB;
35 std::vector<size_t> fShapeY;
36 std::vector<size_t> fShapeMean;
37 std::vector<size_t> fShapeInvStdDev;
38
39 size_t fAxis; // axis in [0, size)
40 size_t fSize; // Size of the input
41 // size_t fAxisDim;
42 size_t fLength; // Length of the input X
43
44 std::vector<size_t> fNormalizedShape;
45 std::vector<size_t> fAxesShape;
48
49 std::string fType;
50
51public:
53
54 ROperator_LayerNormalization(int64_t axis, float epsilon, size_t stashType, const std::string &nameX,
55 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
56 const std::string &nameMean, const std::string &nameInvStdDev)
57 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(nameX), fNScale(nameScale), fNB(nameB),
58 fNY(nameY), fNMean(nameMean), fNInvStdDev(nameInvStdDev)
59 {
60 }
61
62 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
63
64 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
65
66 void Initialize(RModel &model) override
67 {
68 if (!model.CheckIfTensorAlreadyExist(fNX)) {
69 throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
70 }
71 fShapeX = model.GetTensorShape(fNX);
74 // Type of the output
76 // Size of the input
77 fSize = fShapeX.size();
78 // Axis in [0, size)
80 // Shape of fShapeX[0, ..., fAxis)
81 fAxesShape = std::vector<size_t>(fShapeX.begin(), fShapeX.begin() + fAxis);
82 // Lenght of the axes
84 // Shape of fShapeX[fAxis, ..., fSize)
85 fNormalizedShape = std::vector<size_t>(fShapeX.begin() + fAxis, fShapeX.end());
86 // Length of the normalized axis
88 // length of the input
90 // Type of mean and std
92 // Mean
93 if (fNMean.empty()) {
94 fNMean = "Mean" + fNX;
96 }
97 // Inverse Standard Deviation
98 if (fNInvStdDev.empty()) {
99 fNInvStdDev = "InvStdDev" + fNX;
101 }
102 // Cast X to float
103 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
104 fNCastedX = "Casted" + fNX;
106 fNNormalizedX = "Normalized" + fNX;
108 }
109 // Broadcast the bias
110 if (!fNB.empty()) {
111 fShapeB = model.GetTensorShape(fNB);
112 size_t lengthB = ConvertShapeToLength(fShapeB);
113 if (lengthB < fLength) {
114 fNBroadcastedB = "Broadcasted" + fNB;
116 }
117 }
118 }
119
120 std::string GenerateInitCode() override
121 {
122 std::stringstream out;
123 if (!fNBroadcastedB.empty()) {
124 out << SP << "// Broadcasting the bias of LayerNormlization op\n";
125 out << SP << "{\n";
126 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
127 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n";
128 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
129 out << SP << "delete[] data;\n";
130 out << SP << "}\n";
131 }
132 return out.str();
133 }
134
135 std::string Generate(std::string OpName) override
136 {
137 OpName = "op_" + OpName;
138 if (fShapeX.empty()) {
139 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + OpName +
140 " called to generate without beging initialized first.");
141 }
142 if (fShapeX.size() > 5) {
143 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
144 "implemented for input tensor of size > 5.");
145 }
146
147 std::stringstream out;
148
149 out << SP << "// Operator " << OpName << "\n";
150
151 // Loop over all the normalized axes i.e. [axis, ..., size)
152 out << SP << "std::vector<size_t> " << OpName << "_InputShape ({";
153 for (size_t i = 0; i < fSize; i++) {
154 out << fShapeX[i];
155 if (i + 1 < fSize) {
156 out << ",";
157 }
158 }
159 out << "});\n";
160 std::string inputShape = OpName + "_InputShape";
161
163 std::string InputIndex = "axis_0 * " + std::to_string(strides[0]);
164 for (size_t i = 1; i < fSize; i++) {
165 InputIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(strides[i]);
166 }
167
169 std::string axesIndex = "axis_" + std::to_string(0) + " * " + std::to_string(axesStrides[0]);
170 for (size_t i = 1; i < fAxis; i++) {
171 axesIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(axesStrides[i]);
172 }
173
174 auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape);
175 std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + std::to_string(normalizedStrides[0]);
176 for (size_t i = fAxis + 1; i < fSize; i++) {
177 normalizedIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(normalizedStrides[i - fAxis]);
178 }
179
180 if (!fNCastedX.empty()) {
181 // Cast X to float
182 out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
183 out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
184 out << "[i]);\n";
185 out << SP << "}\n";
186 }
187
188 out << SP << "// Compute the mean\n";
189 // Loop over the normalized dimensions
190 for (size_t i = 0; i < fAxis; i++) {
191 std::string iIdx = "axis_" + std::to_string(i);
192 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
193 out << "[" << i << "]; " << iIdx << "++) {\n";
194 }
195 out << SP << SP << fType << " sum = 0.;\n";
196 // loop over all the dims in [0, fAxis)
197 for (size_t j = fAxis; j < fSize; j++) {
198 std::string jIdx = "axis_" + std::to_string(j);
199 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
200 out << "[" << j << "]; " << jIdx << "++) {\n";
201 }
202 out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
203 for (size_t j = fAxis; j < fSize; j++) {
204 out << SP << SP << "}\n";
205 }
206 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
207 out << fNormalizedLength << ");\n";
208 for (size_t i = fAxis; i < fSize; i++) {
209 out << SP << "}\n";
210 }
211
212 out << SP << "// Compute the inverse Standard Deviation\n";
213 // Loop over the normalized dimensions
214 for (size_t i = 0; i < fAxis; i++) {
215 std::string iIdx = "axis_" + std::to_string(i);
216 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
217 out << "[" << i << "]; " << iIdx << "++){\n";
218 }
219 // Set sum = 0
220 out << SP << SP << fType << " sum = 0.;\n";
221 // loop over all the dims in [0, fAxis)
222 for (size_t j = fAxis; j < fSize; j++) {
223 std::string jIdx = "axis_" + std::to_string(j);
224 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
225 out << "[" << j << "]; " << jIdx << "++){\n";
226 }
227 out << SP << SP << SP << "sum += std::pow(tensor_" << fNX << "[" << InputIndex << "] - tensor_";
228 out << fNMean << "[" << axesIndex << "], 2);\n";
229 for (size_t j = fAxis; j < fSize; j++) {
230 out << SP << SP << "}\n";
231 }
232 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
233 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
234 for (size_t i = 0; i < fAxis; i++) {
235 out << SP << "}\n";
236 }
237
238 if (!fNCastedX.empty()) {
239 out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
240 for (size_t i = 0; i < fAxis; i++) {
241 std::string iIdx = "axis_" + std::to_string(i);
242 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
243 out << "[" << i << "]; " << iIdx << "++){\n";
244 }
245 for (size_t j = fAxis; j < fSize; j++) {
246 std::string jIdx = "axis_" + std::to_string(j);
247 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
248 out << "[" << j << "]; " << jIdx << "++){\n";
249 }
250 out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
251 out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
252 out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
253 for (size_t j = fAxis; j < fSize; j++) {
254 out << SP << SP << "}\n";
255 }
256 for (size_t i = fAxis; i < fSize; i++) {
257 out << SP << "}\n";
258 }
259 out << "// Y = Scale o NormalizedX";
260 for (size_t i = 0; i < fAxis; i++) {
261 std::string iIdx = "axis_" + std::to_string(i);
262 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
263 out << "[" << i << "]; " << iIdx << "++){\n";
264 }
265 for (size_t j = fAxis; j < fSize; j++) {
266 std::string jIdx = "axis_" + std::to_string(j);
267 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
268 out << "[" << j << "]; " << jIdx << "++){\n";
269 }
270 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
271 out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
272 out << "]);\n";
273 for (size_t j = fAxis; j < fSize; j++) {
274 out << SP << SP << "}\n";
275 }
276 for (size_t i = fAxis; i < fSize; i++) {
277 out << SP << "}\n";
278 }
279 } else {
280 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
281 for (size_t i = 0; i < fAxis; i++) {
282 std::string iIdx = "axis_" + std::to_string(i);
283 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
284 out << "[" << i << "]; " << iIdx << "++){\n";
285 }
286 for (size_t j = fAxis; j < fSize; j++) {
287 std::string jIdx = "axis_" + std::to_string(j);
288 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
289 out << "[" << j << "]; " << jIdx << "++){\n";
290 }
291 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
292 out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
293 out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
294 out << axesIndex << "]);\n";
295 for (size_t j = fAxis; j < fSize; j++) {
296 out << SP << SP << "}\n";
297 }
298 for (size_t i = fAxis; i < fSize; i++) {
299 out << SP << "}\n";
300 }
301 }
302
303 if (!fNB.empty()) {
304 std::string Bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
305 out << SP << "// Add the bias to Y\n";
306 out << SP << "int " << OpName << "_n = " << fLength << ";\n";
307 out << SP << "float " << OpName << "_alpha = 1.;\n";
308 out << SP << "int " << OpName << "_inc = 1;\n";
309 out << SP << "BLAS::saxpy_(&" << OpName << "_n, &" << OpName << "_alpha, " << Bias << ", &";
310 out << OpName << "_inc, " << "tensor_" << fNY << ", &" << OpName << "_inc);\n";
311 }
312
313 return out.str();
314 }
315
316 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
317
318 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
319};
320
321} // namespace SOFIE
322} // namespace Experimental
323} // namespace TMVA
324
325#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:80
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape)
Definition RModel.cxx:160
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:101
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:59
ROperator_LayerNormalization(int64_t axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:41
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertShapeToString(std::vector< size_t > shape)
std::string ConvertTypeToString(ETensorType type)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations
double epsilon
Definition triangle.c:618