Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
20
21 std::string fNX;
22 std::string fNScale;
23 std::string fNB;
24 std::string fNY;
25 std::string fNMean;
26 std::string fNInvStdDev;
27
28 std::string fNCastedX;
29 std::string fNNormalizedX;
30 std::string fNBroadcastedB;
31
32 std::vector<Dim> fShapeX;
33 std::vector<Dim> fShapeScale;
34 std::vector<size_t> fShapeB; // shape of input Bias (B) is assumed to be fully defined
35 std::vector<Dim> fShapeY;
36 std::vector<Dim> fShapeMean;
37 std::vector<Dim> fShapeInvStdDev;
38
39 size_t fAxis; // axis in [0, size)
40 size_t fSize; // Size of the input
41 // size_t fAxisDim;
42
43 std::vector<Dim> fNormalizedShape;
44 std::vector<Dim> fAxesShape;
45 // lengths in string format
46 std::string fLength; // Length of the input
47 std::string fNormalizedLength;
48 std::string fAxesLength;
49
50 std::string fType;
51
52public:
54
55 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
56 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
57 const std::string &nameMean, const std::string &nameInvStdDev)
58 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
59 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
60 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
61 {
63 if (!fNB.empty()){
64 fInputTensorNames.emplace_back(fNB);
65 }
66
68 if (!fNMean.empty()){
69 fOutputTensorNames.emplace_back(fNMean);
70 }
71 if (!fNInvStdDev.empty()){
72 fOutputTensorNames.emplace_back(fNInvStdDev);
73 }
74 }
75
76 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
77
78 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
79
80 void Initialize(RModel& model) override {
81 if (!model.CheckIfTensorAlreadyExist(fNX)) {
82 throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
83 }
84 bool isDynamic = model.IsDynamicTensor(fNX);
88 // Type of the output
90 // Size of the input
91 fSize = fShapeX.size();
92 // Axis in [0, size)
94 // Shape of fShapeX[0, ..., fAxis)
95 fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
96 // Length of the axes
98 // Shape of fShapeX[fAxis, ..., fSize)
99 fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
100 // Length of the normalized axis
102 // length of the input
104 // Type of mean and std
106 // Mean
107 if (fNMean.empty()) {
108 fNMean = "Mean" + fNX;
109 // cannot use initializer list with one element since it is ambiguous
110 if (isDynamic)
111 // add size_t(-1) to indicate that shape is an expression
112 model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
113 else
114 model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
115 }
116 // Inverse Standard Deviation
117 if (fNInvStdDev.empty()) {
118 fNInvStdDev = "InvStdDev" + fNX;
119 if (isDynamic)
120 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
121 else
122 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
123 }
124 // Cast X to float
125 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
126 fNCastedX = "Casted" + fNX;
128 fNNormalizedX = "Normalized" + fNX;
130 }
131 // Broadcast the bias
132 if (!fNB.empty()) {
133 fShapeB = model.GetTensorShape(fNB);
135 if (isDynamic || lengthB < static_cast<size_t>(std::stoi(fLength))) {
136 fNBroadcastedB = "Broadcasted" + fNB;
138 }
139 }
140 model.AddNeededStdLib("cmath");
141 }
142
143 std::string GenerateInitCode() override
144 {
145 std::stringstream out;
146 if (!fNBroadcastedB.empty()) {
147 out << SP << "// Broadcasting the bias of LayerNormalization op\n";
148 out << SP << "{\n";
149 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
150 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertDynamicShapeToString(fShapeX) << ");\n";
151 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
152 out << SP << "delete[] data;\n";
153 out << SP << "}\n";
154 }
155 return out.str();
156 }
157
158 std::string Generate(std::string OpName) override
159 {
160 OpName = "op_" + OpName;
161 if (fShapeX.empty()) {
162 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + OpName +
163 " called to generate without being initialized first.");
164 }
165 if (fShapeX.size() > 5) {
166 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
167 "implemented for input tensor of size > 5.");
168 }
169
170 std::stringstream out;
171
172 out << "//---- Layer Normalization operator " << OpName << "\n";
173
174 // Loop over all the normalized axes i.e. [axis, ..., size)
175 out << SP << "std::vector<size_t> " << OpName << "_InputShape ({";
176 for (size_t i = 0; i < fSize; i++) {
177 out << fShapeX[i].GetVal();
178 if (i + 1 < fSize) {
179 out << ",";
180 }
181 }
182 out << "});\n";
183 std::string inputShape = OpName + "_InputShape";
184
186 std::string InputIndex = "axis_0 * " + strides[0].GetVal();
187 for (size_t i = 1; i < fSize; i++) {
188 InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal();
189 }
190
192 std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
193 for (size_t i = 1; i < fAxis; i++) {
194 axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
195 }
196
198 std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal();
199 for (size_t i = fAxis + 1; i < fSize; i++) {
200 normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal();
201 }
202
203 if (!fNCastedX.empty()) {
204 // Cast X to float
205 out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
206 out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
207 out << "[i]);\n";
208 out << SP << "}\n";
209 }
210
211 out << SP << "// Compute the mean\n";
212 // Loop over the normalized dimensions
213 for (size_t i = 0; i < fAxis; i++) {
214 std::string iIdx = "axis_" + std::to_string(i);
215 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
216 out << "[" << i << "]; " << iIdx << "++) {\n";
217 }
218 out << SP << SP << fType << " sum = 0.;\n";
219 // loop over all the dims in [0, fAxis)
220 for (size_t j = fAxis; j < fSize; j++) {
221 std::string jIdx = "axis_" + std::to_string(j);
222 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
223 out << "[" << j << "]; " << jIdx << "++) {\n";
224 }
225 out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
226 for (size_t j = fAxis; j < fSize; j++) {
227 out << SP << SP << "}\n";
228 }
229 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
230 out << fNormalizedLength << ");\n";
231 for (size_t i = fAxis; i < fSize; i++) {
232 out << SP << "}\n";
233 }
234
235 out << SP << "// Compute the inverse Standard Deviation\n";
236 // Loop over the normalized dimensions
237 for (size_t i = 0; i < fAxis; i++) {
238 std::string iIdx = "axis_" + std::to_string(i);
239 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
240 out << "[" << i << "]; " << iIdx << "++){\n";
241 }
242 // Set sum = 0
243 out << SP << SP << fType << " sum = 0.;\n";
244 // loop over all the dims in [0, fAxis)
245 for (size_t j = fAxis; j < fSize; j++) {
246 std::string jIdx = "axis_" + std::to_string(j);
247 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
248 out << "[" << j << "]; " << jIdx << "++){\n";
249 }
250 out << SP << SP << SP << "sum += std::pow(tensor_" << fNX << "[" << InputIndex << "] - tensor_";
251 out << fNMean << "[" << axesIndex << "], 2);\n";
252 for (size_t j = fAxis; j < fSize; j++) {
253 out << SP << SP << "}\n";
254 }
255 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
256 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
257 for (size_t i = 0; i < fAxis; i++) {
258 out << SP << "}\n";
259 }
260
261 if (!fNCastedX.empty()) {
262 out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
263 for (size_t i = 0; i < fAxis; i++) {
264 std::string iIdx = "axis_" + std::to_string(i);
265 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
266 out << "[" << i << "]; " << iIdx << "++){\n";
267 }
268 for (size_t j = fAxis; j < fSize; j++) {
269 std::string jIdx = "axis_" + std::to_string(j);
270 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
271 out << "[" << j << "]; " << jIdx << "++){\n";
272 }
273 out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
274 out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
275 out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
276 for (size_t j = fAxis; j < fSize; j++) {
277 out << SP << SP << "}\n";
278 }
279 for (size_t i = fAxis; i < fSize; i++) {
280 out << SP << "}\n";
281 }
282 out << "// Y = Scale o NormalizedX";
283 for (size_t i = 0; i < fAxis; i++) {
284 std::string iIdx = "axis_" + std::to_string(i);
285 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
286 out << "[" << i << "]; " << iIdx << "++){\n";
287 }
288 for (size_t j = fAxis; j < fSize; j++) {
289 std::string jIdx = "axis_" + std::to_string(j);
290 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
291 out << "[" << j << "]; " << jIdx << "++){\n";
292 }
293 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
294 out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
295 out << "]);\n";
296 for (size_t j = fAxis; j < fSize; j++) {
297 out << SP << SP << "}\n";
298 }
299 for (size_t i = fAxis; i < fSize; i++) {
300 out << SP << "}\n";
301 }
302 } else {
303 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
304 for (size_t i = 0; i < fAxis; i++) {
305 std::string iIdx = "axis_" + std::to_string(i);
306 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
307 out << "[" << i << "]; " << iIdx << "++){\n";
308 }
309 for (size_t j = fAxis; j < fSize; j++) {
310 std::string jIdx = "axis_" + std::to_string(j);
311 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
312 out << "[" << j << "]; " << jIdx << "++){\n";
313 }
314 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
315 out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
316 out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
317 out << axesIndex << "]);\n";
318 for (size_t j = fAxis; j < fSize; j++) {
319 out << SP << SP << "}\n";
320 }
321 for (size_t i = fAxis; i < fSize; i++) {
322 out << SP << "}\n";
323 }
324 }
325
326 if (!fNB.empty()) {
327 std::string Bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
328 out << SP << "// Add the bias to Y\n";
329 out << SP << "int " << OpName << "_n = " << fLength << ";\n";
330 out << SP << "float " << OpName << "_alpha = 1.;\n";
331 out << SP << "int " << OpName << "_inc = 1;\n";
332 out << SP << "BLAS::saxpy_(&" << OpName << "_n, &" << OpName << "_alpha, " << Bias << ", &";
333 out << OpName << "_inc, " << "tensor_" << fNY << ", &" << OpName << "_inc);\n";
334 }
335
336 return out.str();
337 }
338
339 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
340
341 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
342};
343
344} // namespace SOFIE
345} // namespace Experimental
346} // namespace TMVA
347
348#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void AddNeededStdLib(std::string libname)
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:94
bool IsDynamicTensor(const std::string &name) const
Definition RModel.cxx:213
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:227
std::vector< Dim > GetDynamicTensorShape(std::string name)
Definition RModel.cxx:82
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:122
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:56
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:46
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:42
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:47
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDynamicShapeToLength(std::vector< Dim > shape)
std::string ConvertShapeToString(std::vector< size_t > shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDynamicShapeToString(std::vector< Dim > shape)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations