Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
20
21 std::string fNX;
22 std::string fNScale;
23 std::string fNB;
24 std::string fNY;
25 std::string fNMean;
26 std::string fNInvStdDev;
27
28 std::string fNCastedX;
29 std::string fNNormalizedX;
30 std::string fNBroadcastedB;
31
32 std::vector<Dim> fShapeX;
33 std::vector<Dim> fShapeScale;
34 std::vector<size_t> fShapeB; // shape of input Bias (B) is assumed to be fully defined
35 std::vector<Dim> fShapeY;
36 std::vector<Dim> fShapeMean;
37 std::vector<Dim> fShapeInvStdDev;
38
39 size_t fAxis; // axis in [0, size)
40 size_t fSize; // Size of the input
41 // size_t fAxisDim;
42
43 std::vector<Dim> fNormalizedShape;
44 std::vector<Dim> fAxesShape;
45 // lengths in string format
46 std::string fLength; // Length of the input
47 std::string fNormalizedLength;
48 std::string fAxesLength;
49
50 std::string fType;
51
52public:
54
55 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
56 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
57 const std::string &nameMean, const std::string &nameInvStdDev)
58 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
59 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
60 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
61 {
62 }
63
64 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
65
66 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
67
68 void Initialize(RModel &model) override
69 {
70 if (!model.CheckIfTensorAlreadyExist(fNX)) {
71 throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
72 }
73 bool isDynamic = model.IsDynamicTensor(fNX);
77 // Type of the output
79 // Size of the input
80 fSize = fShapeX.size();
81 // Axis in [0, size)
83 // Shape of fShapeX[0, ..., fAxis)
84 fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
85 // Length of the axes
87 // Shape of fShapeX[fAxis, ..., fSize)
88 fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
89 // Length of the normalized axis
91 // length of the input
93 // Type of mean and std
95 // Mean
96 if (fNMean.empty()) {
97 fNMean = "Mean" + fNX;
98 // cannot use initializer list with one element since it is ambiguous
99 if (isDynamic)
100 // add size_t(-1) to indicate that shape is an expression
101 model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
102 else
103 model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
104 }
105 // Inverse Standard Deviation
106 if (fNInvStdDev.empty()) {
107 fNInvStdDev = "InvStdDev" + fNX;
108 if (isDynamic)
109 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
110 else
111 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
112 }
113 // Cast X to float
114 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
115 fNCastedX = "Casted" + fNX;
117 fNNormalizedX = "Normalized" + fNX;
119 }
120 // Broadcast the bias
121 if (!fNB.empty()) {
122 fShapeB = model.GetTensorShape(fNB);
123 size_t lengthB = ConvertShapeToLength(fShapeB);
124 if (isDynamic || lengthB < static_cast<size_t>(std::stoi(fLength))) {
125 fNBroadcastedB = "Broadcasted" + fNB;
127 }
128 }
129 model.AddNeededStdLib("cmath");
130 }
131
132 std::string GenerateInitCode() override
133 {
134 std::stringstream out;
135 if (!fNBroadcastedB.empty()) {
136 out << SP << "// Broadcasting the bias of LayerNormalization op\n";
137 out << SP << "{\n";
138 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
139 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertDynamicShapeToString(fShapeX) << ");\n";
140 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
141 out << SP << "delete[] data;\n";
142 out << SP << "}\n";
143 }
144 return out.str();
145 }
146
147 std::string Generate(std::string OpName) override
148 {
149 OpName = "op_" + OpName;
150 if (fShapeX.empty()) {
151 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + OpName +
152 " called to generate without being initialized first.");
153 }
154 if (fShapeX.size() > 5) {
155 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
156 "implemented for input tensor of size > 5.");
157 }
158
159 std::stringstream out;
160
161 out << "//---- Layer Normalization operator " << OpName << "\n";
162
163 // Loop over all the normalized axes i.e. [axis, ..., size)
164 out << SP << "std::vector<size_t> " << OpName << "_InputShape ({";
165 for (size_t i = 0; i < fSize; i++) {
166 out << fShapeX[i].GetVal();
167 if (i + 1 < fSize) {
168 out << ",";
169 }
170 }
171 out << "});\n";
172 std::string inputShape = OpName + "_InputShape";
173
175 std::string InputIndex = "axis_0 * " + strides[0].GetVal();
176 for (size_t i = 1; i < fSize; i++) {
177 InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal();
178 }
179
181 std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
182 for (size_t i = 1; i < fAxis; i++) {
183 axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
184 }
185
186 auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape);
187 std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal();
188 for (size_t i = fAxis + 1; i < fSize; i++) {
189 normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal();
190 }
191
192 if (!fNCastedX.empty()) {
193 // Cast X to float
194 out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
195 out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
196 out << "[i]);\n";
197 out << SP << "}\n";
198 }
199
200 out << SP << "// Compute the mean\n";
201 // Loop over the normalized dimensions
202 for (size_t i = 0; i < fAxis; i++) {
203 std::string iIdx = "axis_" + std::to_string(i);
204 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
205 out << "[" << i << "]; " << iIdx << "++) {\n";
206 }
207 out << SP << SP << fType << " sum = 0.;\n";
208 // loop over all the dims in [0, fAxis)
209 for (size_t j = fAxis; j < fSize; j++) {
210 std::string jIdx = "axis_" + std::to_string(j);
211 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
212 out << "[" << j << "]; " << jIdx << "++) {\n";
213 }
214 out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
215 for (size_t j = fAxis; j < fSize; j++) {
216 out << SP << SP << "}\n";
217 }
218 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
219 out << fNormalizedLength << ");\n";
220 for (size_t i = fAxis; i < fSize; i++) {
221 out << SP << "}\n";
222 }
223
224 out << SP << "// Compute the inverse Standard Deviation\n";
225 // Loop over the normalized dimensions
226 for (size_t i = 0; i < fAxis; i++) {
227 std::string iIdx = "axis_" + std::to_string(i);
228 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
229 out << "[" << i << "]; " << iIdx << "++){\n";
230 }
231 // Set sum = 0
232 out << SP << SP << fType << " sum = 0.;\n";
233 // loop over all the dims in [0, fAxis)
234 for (size_t j = fAxis; j < fSize; j++) {
235 std::string jIdx = "axis_" + std::to_string(j);
236 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
237 out << "[" << j << "]; " << jIdx << "++){\n";
238 }
239 out << SP << SP << SP << "sum += std::pow(tensor_" << fNX << "[" << InputIndex << "] - tensor_";
240 out << fNMean << "[" << axesIndex << "], 2);\n";
241 for (size_t j = fAxis; j < fSize; j++) {
242 out << SP << SP << "}\n";
243 }
244 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
245 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
246 for (size_t i = 0; i < fAxis; i++) {
247 out << SP << "}\n";
248 }
249
250 if (!fNCastedX.empty()) {
251 out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
252 for (size_t i = 0; i < fAxis; i++) {
253 std::string iIdx = "axis_" + std::to_string(i);
254 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
255 out << "[" << i << "]; " << iIdx << "++){\n";
256 }
257 for (size_t j = fAxis; j < fSize; j++) {
258 std::string jIdx = "axis_" + std::to_string(j);
259 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
260 out << "[" << j << "]; " << jIdx << "++){\n";
261 }
262 out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
263 out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
264 out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
265 for (size_t j = fAxis; j < fSize; j++) {
266 out << SP << SP << "}\n";
267 }
268 for (size_t i = fAxis; i < fSize; i++) {
269 out << SP << "}\n";
270 }
271 out << "// Y = Scale o NormalizedX";
272 for (size_t i = 0; i < fAxis; i++) {
273 std::string iIdx = "axis_" + std::to_string(i);
274 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
275 out << "[" << i << "]; " << iIdx << "++){\n";
276 }
277 for (size_t j = fAxis; j < fSize; j++) {
278 std::string jIdx = "axis_" + std::to_string(j);
279 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
280 out << "[" << j << "]; " << jIdx << "++){\n";
281 }
282 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
283 out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
284 out << "]);\n";
285 for (size_t j = fAxis; j < fSize; j++) {
286 out << SP << SP << "}\n";
287 }
288 for (size_t i = fAxis; i < fSize; i++) {
289 out << SP << "}\n";
290 }
291 } else {
292 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
293 for (size_t i = 0; i < fAxis; i++) {
294 std::string iIdx = "axis_" + std::to_string(i);
295 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
296 out << "[" << i << "]; " << iIdx << "++){\n";
297 }
298 for (size_t j = fAxis; j < fSize; j++) {
299 std::string jIdx = "axis_" + std::to_string(j);
300 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
301 out << "[" << j << "]; " << jIdx << "++){\n";
302 }
303 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
304 out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
305 out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
306 out << axesIndex << "]);\n";
307 for (size_t j = fAxis; j < fSize; j++) {
308 out << SP << SP << "}\n";
309 }
310 for (size_t i = fAxis; i < fSize; i++) {
311 out << SP << "}\n";
312 }
313 }
314
315 if (!fNB.empty()) {
316 std::string Bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
317 out << SP << "// Add the bias to Y\n";
318 out << SP << "int " << OpName << "_n = " << fLength << ";\n";
319 out << SP << "float " << OpName << "_alpha = 1.;\n";
320 out << SP << "int " << OpName << "_inc = 1;\n";
321 out << SP << "BLAS::saxpy_(&" << OpName << "_n, &" << OpName << "_alpha, " << Bias << ", &";
322 out << OpName << "_inc, " << "tensor_" << fNY << ", &" << OpName << "_inc);\n";
323 }
324
325 return out.str();
326 }
327
328 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
329
330 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
331};
332
333} // namespace SOFIE
334} // namespace Experimental
335} // namespace TMVA
336
337#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void AddNeededStdLib(std::string libname)
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:91
bool IsDynamicTensor(const std::string &name) const
Definition RModel.cxx:186
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:196
std::vector< Dim > GetDynamicTensorShape(std::string name)
Definition RModel.cxx:79
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:116
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:56
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:41
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDynamicShapeToLength(std::vector< Dim > shape)
std::string ConvertShapeToString(std::vector< size_t > shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDynamicShapeToString(std::vector< Dim > shape)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations