Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
20
21 std::string fNX;
22 std::string fNScale;
23 std::string fNB;
24 std::string fNY;
25 std::string fNMean;
26 std::string fNInvStdDev;
27
28 std::string fNCastedX;
29 std::string fNNormalizedX;
30 std::string fNBroadcastedB;
31
32 std::vector<Dim> fShapeX;
33 std::vector<Dim> fShapeScale;
34 std::vector<size_t> fShapeB; // shape of input Bias (B) is assumed to be fully defined
35 std::vector<Dim> fShapeY;
36 std::vector<Dim> fShapeMean;
37 std::vector<Dim> fShapeInvStdDev;
38
39 size_t fAxis; // axis in [0, size)
40 size_t fSize; // Size of the input
41 // size_t fAxisDim;
42
43 std::vector<Dim> fNormalizedShape;
44 std::vector<Dim> fAxesShape;
45 // lengths in string format
46 std::string fLength; // Length of the input
47 std::string fNormalizedLength;
48 std::string fAxesLength;
49
50 std::string fType;
51
52public:
54
55 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
56 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
57 const std::string &nameMean, const std::string &nameInvStdDev)
58 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
59 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
60 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
61 {
63 if (!fNB.empty()){
64 fInputTensorNames.emplace_back(fNB);
65 }
66
68 if (!fNMean.empty()){
69 fOutputTensorNames.emplace_back(fNMean);
70 }
71 if (!fNInvStdDev.empty()){
72 fOutputTensorNames.emplace_back(fNInvStdDev);
73 }
74 }
75
76 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
77
78 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
79
80 void Initialize(RModel& model) override {
81 if (!model.CheckIfTensorAlreadyExist(fNX)) {
82 throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
83 }
84 bool isDynamic = model.IsDynamicTensor(fNX);
88 // Type of the output
90 // Size of the input
91 fSize = fShapeX.size();
92 // Axis in [0, size)
94 // Shape of fShapeX[0, ..., fAxis)
95 fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
96 // Length of the axes
98 // Shape of fShapeX[fAxis, ..., fSize)
99 fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
100 // Length of the normalized axis
102 // length of the input
104 // Type of mean and std
106 // Mean
107 if (fNMean.empty()) {
108 fNMean = "Mean" + fNX;
109 // cannot use initializer list with one element since it is ambiguous
110 if (isDynamic)
111 // add size_t(-1) to indicate that shape is an expression
112 model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
113 else
114 model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
115 }
116 // Inverse Standard Deviation
117 if (fNInvStdDev.empty()) {
118 fNInvStdDev = "InvStdDev" + fNX;
119 if (isDynamic)
120 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
121 else
122 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
123 }
124 // Cast X to float
125 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
126 fNCastedX = "Casted" + fNX;
128 fNNormalizedX = "Normalized" + fNX;
130 }
131 // Broadcast the bias
132 if (!fNB.empty()) {
133 fShapeB = model.GetTensorShape(fNB);
135 if (isDynamic || lengthB < static_cast<size_t>(std::stoi(fLength))) {
136 fNBroadcastedB = "Broadcasted" + fNB;
138 }
139 }
140 model.AddNeededStdLib("cmath");
141 }
142
143 std::string GenerateInitCode() override
144 {
145 std::stringstream out;
146 if (!fNBroadcastedB.empty()) {
147 out << SP << "// Broadcasting the bias of LayerNormalization op\n";
148 out << SP << "{\n";
149 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
150 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertDynamicShapeToString(fShapeX) << ");\n";
151 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
152 out << SP << "delete[] data;\n";
153 out << SP << "}\n";
154 }
155 return out.str();
156 }
157
158 std::string Generate(std::string opName) override
159 {
160 opName = "op_" + opName;
161 if (fShapeX.empty()) {
162 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + opName +
163 " called to generate without being initialized first.");
164 }
165 if (fShapeX.size() > 5) {
166 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
167 "implemented for input tensor of size > 5.");
168 }
169
170 std::stringstream out;
171
172 out << "//---- Layer Normalization operator " << opName << "\n";
173
174 // Loop over all the normalized axes i.e. [axis, ..., size)
175 std::vector<std::string> inputShape(fSize);
176
177 for (size_t i = 0; i < fSize; i++) {
178 inputShape[i] = fShapeX[i].GetVal();
179 }
180
182 std::string InputIndex = "axis_0 * " + strides[0].GetVal();
183 for (size_t i = 1; i < fSize; i++) {
184 InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal();
185 }
186
188 std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
189 for (size_t i = 1; i < fAxis; i++) {
190 axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
191 }
192
194 std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal();
195 for (size_t i = fAxis + 1; i < fSize; i++) {
196 normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal();
197 }
198
199 if (!fNCastedX.empty()) {
200 // Cast X to float
201 out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
202 out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
203 out << "[i]);\n";
204 out << SP << "}\n";
205 }
206
207 out << SP << "// Compute the mean\n";
208 // Loop over the normalized dimensions
209 for (size_t i = 0; i < fAxis; i++) {
210 std::string iIdx = "axis_" + std::to_string(i);
211 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
212 << "; " << iIdx << "++) {\n";
213 }
214 out << SP << SP << fType << " sum = 0.;\n";
215 // loop over all the dims in [0, fAxis)
216 for (size_t j = fAxis; j < fSize; j++) {
217 std::string jIdx = "axis_" + std::to_string(j);
218 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
219 << "; " << jIdx << "++) {\n";
220 }
221 out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
222 for (size_t j = fAxis; j < fSize; j++) {
223 out << SP << SP << "}\n";
224 }
225 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
226 out << fNormalizedLength << ");\n";
227 for (size_t i = fAxis; i < fSize; i++) {
228 out << SP << "}\n";
229 }
230
231 out << SP << "// Compute the inverse Standard Deviation\n";
232 // Loop over the normalized dimensions
233 for (size_t i = 0; i < fAxis; i++) {
234 std::string iIdx = "axis_" + std::to_string(i);
235 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
236 << "; " << iIdx << "++){\n";
237 }
238 // Set sum = 0
239 out << SP << SP << fType << " sum = 0.;\n";
240 // loop over all the dims in [0, fAxis)
241 for (size_t j = fAxis; j < fSize; j++) {
242 std::string jIdx = "axis_" + std::to_string(j);
243 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
244 << "; " << jIdx << "++){\n";
245 }
246 out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << InputIndex << "] - tensor_"
247 << fNMean << "[" << axesIndex << "];\n";
248 out << SP << SP << SP << "sum += tmp*tmp;\n";
249 for (size_t j = fAxis; j < fSize; j++) {
250 out << SP << SP << "}\n";
251 }
252 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
253 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
254 for (size_t i = 0; i < fAxis; i++) {
255 out << SP << "}\n";
256 }
257
258 if (!fNCastedX.empty()) {
259 out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
260 for (size_t i = 0; i < fAxis; i++) {
261 std::string iIdx = "axis_" + std::to_string(i);
262 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
263 << "; " << iIdx << "++){\n";
264 }
265 for (size_t j = fAxis; j < fSize; j++) {
266 std::string jIdx = "axis_" + std::to_string(j);
267 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
268 << "; " << jIdx << "++){\n";
269 }
270 out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
271 out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
272 out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
273 for (size_t j = fAxis; j < fSize; j++) {
274 out << SP << SP << "}\n";
275 }
276 for (size_t i = fAxis; i < fSize; i++) {
277 out << SP << "}\n";
278 }
279 out << "// Y = Scale o NormalizedX";
280 for (size_t i = 0; i < fAxis; i++) {
281 std::string iIdx = "axis_" + std::to_string(i);
282 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
283 << "; " << iIdx << "++){\n";
284 }
285 for (size_t j = fAxis; j < fSize; j++) {
286 std::string jIdx = "axis_" + std::to_string(j);
287 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
288 << "; " << jIdx << "++){\n";
289 }
290 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
291 out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
292 out << "]);\n";
293 for (size_t j = fAxis; j < fSize; j++) {
294 out << SP << SP << "}\n";
295 }
296 for (size_t i = fAxis; i < fSize; i++) {
297 out << SP << "}\n";
298 }
299 } else {
300 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
301 for (size_t i = 0; i < fAxis; i++) {
302 std::string iIdx = "axis_" + std::to_string(i);
303 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
304 << "; " << iIdx << "++){\n";
305 }
306 for (size_t j = fAxis; j < fSize; j++) {
307 std::string jIdx = "axis_" + std::to_string(j);
308 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
309 << "; " << jIdx << "++){\n";
310 }
311 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
312 out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
313 out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
314 out << axesIndex << "]);\n";
315 for (size_t j = fAxis; j < fSize; j++) {
316 out << SP << SP << "}\n";
317 }
318 for (size_t i = fAxis; i < fSize; i++) {
319 out << SP << "}\n";
320 }
321 }
322
323 if (!fNB.empty()) {
324 std::string bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
325 out << SP << "// Add the bias to Y\n";
326 out << SP << "int " << opName << "_n = " << fLength << ";\n";
327 out << SP << "float " << opName << "_alpha = 1.;\n";
328 out << SP << "int " << opName << "_inc = 1;\n";
329 out << SP << "BLAS::saxpy_(&" << opName << "_n, &" << opName << "_alpha, " << bias << ", &";
330 out << opName << "_inc, " << "tensor_" << fNY << ", &" << opName << "_inc);\n";
331 }
332
333 return out.str();
334 }
335
336 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
337
338 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
339};
340
341} // namespace SOFIE
342} // namespace Experimental
343} // namespace TMVA
344
345#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void AddNeededStdLib(std::string libname)
bool IsDynamicTensor(const std::string &name) const
Definition RModel.cxx:186
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:200
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:95
const ETensorType & GetTensorType(std::string name) const
Definition RModel.cxx:67
const std::vector< size_t > & GetTensorShape(std::string name) const
Definition RModel.cxx:29
std::vector< Dim > GetDynamicTensorShape(std::string name) const
Definition RModel.cxx:55
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:46
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:42
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:47
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDynamicShapeToLength(std::vector< Dim > shape)
std::string ConvertShapeToString(std::vector< size_t > shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDynamicShapeToString(std::vector< Dim > shape)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations