Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
17 bool fCastToFloat = false; // flag to indicate if operation 1 are in floats (to be impl)
21
22 std::string fNX;
23 std::string fNScale;
24 std::string fNB;
25 std::string fNY;
26 std::string fNMean;
27 std::string fNInvStdDev;
28
29 std::string fNCastedX;
30 std::string fNNormalizedX;
31 std::string fNBroadcastedB;
32
33 std::vector<Dim> fShapeX;
34 std::vector<Dim> fShapeScale;
35 std::vector<Dim> fShapeB;
36 std::vector<Dim> fShapeY;
37 std::vector<Dim> fShapeMean;
38 std::vector<Dim> fShapeInvStdDev;
39
40 size_t fAxis; // axis in [0, size)
41 size_t fSize; // Size of the input
42 // size_t fAxisDim;
43
44 std::vector<Dim> fNormalizedShape; // shape from X[ axis,...,N-1]
45 std::vector<Dim> fAxesShape; // shape from X[0,..,axis-1]
46 // lengths in string format
47 std::string fLength; // Length of the input
48 std::string fNormalizedLength;
49 std::string fAxesLength;
50
51 std::string fType;
52
53public:
55
56 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
57 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
58 const std::string &nameMean, const std::string &nameInvStdDev)
59 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
60 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
61 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
62 {
64 if (!fNB.empty()){
65 fInputTensorNames.emplace_back(fNB);
66 }
67
69 if (!fNMean.empty()){
70 fOutputTensorNames.emplace_back(fNMean);
71 }
72 if (!fNInvStdDev.empty()){
73 fOutputTensorNames.emplace_back(fNInvStdDev);
74 }
75 }
76
77 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
78
79 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
80
81 void Initialize(RModel& model) override {
82 if (!model.CheckIfTensorAlreadyExist(fNX)) {
83 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Tensor " + fNX + " not found.");
84 }
85 bool isDynamic = model.IsDynamicTensor(fNX);
89 // Type of the output
91 // Size of the input
92 fSize = fShapeX.size();
93 // Axis in [0, size)
95 // Shape of fShapeX[0, ..., fAxis)
96 fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
97 // Length of the axes
99 // Shape of fShapeX[fAxis, ..., fSize)
100 fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
101 // Length of the normalized axis
103 // length of the input
105 // Type of mean and std
107 // Mean
108 if (!fNMean.empty()) {
109 // cannot use initializer list with one element since it is ambiguous
110 if (isDynamic)
111 // add size_t(-1) to indicate that shape is an expression
112 model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
113 else
114 model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
115 }
116 // Inverse Standard Deviation
117 if (!fNInvStdDev.empty()) {
118 if (isDynamic)
119 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
120 else
121 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
122 }
123 // if mean and stdev are not empty they are not defined in the output list
124 // Cast X to float
125 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
126 fCastToFloat = true;
127 fType = "float";
128 // fNCastedX = "Casted" + fNX;
129 // model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX);
130 // fNNormalizedX = "Normalized" + fNX;
131 // model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX);
132 }
133 // scale shape
135 // appends 1 to scale shapes if missing
136 size_t dimScale = fShapeScale.size();
137 if (dimScale < fSize) {
138 for (size_t i = 0; i < fSize-dimScale; i++)
139 fShapeScale.insert(fShapeScale.begin(), Dim{1});
140 }
141 // check also shape if consistent now
142 for (size_t i = 0; i < fSize; i++) {
143 if (fShapeScale[i].dim != 1 && fShapeScale[i] != fShapeX[i])
144 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Scale Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale));
145 }
146 if (!fNB.empty()) {
148 // appends 1 to bias shapes if missing
149 size_t dimB = fShapeB.size();
150 if (dimB < fShapeX.size()) {
151 for (size_t i = 0; i < fSize-dimB; i++)
152 fShapeB.insert(fShapeB.begin(), Dim{1});
153 }
154 for (size_t i = 0; i < fSize; i++) {
155 if (fShapeB[i].dim != 1 && fShapeB[i] != fShapeX[i])
156 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Bias Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale));
157 }
158 }
159
160 std::cout << "bias + scale " << ConvertDimShapeToString(fShapeB) << " " << ConvertDimShapeToString(fShapeScale) << std::endl;
161
162 // // Broadcast the bias
163 // if (!fNB.empty()) {
164 // fShapeB = model.GetTensorShape(fNB);
165 // size_t lengthB = ConvertShapeToLength(fShapeB);
166 // if (isDynamic || lengthB < static_cast<size_t>(std::stoi(fLength))) {
167 // fNBroadcastedB = "Broadcasted" + fNB;
168 // model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX);
169 // }
170 // }
171 model.AddNeededStdLib("cmath");
172 }
173
174 std::string GenerateInitCode() override
175 {
176 std::stringstream out;
177 if (!fNBroadcastedB.empty()) {
178 out << SP << "// Broadcasting the bias of LayerNormalization op\n";
179 out << SP << "{\n";
180 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
181 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n";
182 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
183 out << SP << "delete[] data;\n";
184 out << SP << "}\n";
185 }
186 return out.str();
187 }
188
189 std::string Generate(std::string opName) override
190 {
191 opName = "op_" + opName;
192 if (fShapeX.empty()) {
193 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + opName +
194 " called to generate without being initialized first.");
195 }
196
197 std::stringstream out;
198
199 out << "//---- Layer Normalization operator " << opName << "\n";
200
201 // Loop over all the normalized axes i.e. [axis, ..., size)
202 std::vector<std::string> inputShape(fSize);
203
204 for (size_t i = 0; i < fSize; i++) {
205 inputShape[i] = fShapeX[i].GetVal();
206 }
207
209 std::string inputIndex = "axis_0 * " + strides[0].GetVal();
210 for (size_t i = 1; i < fSize; i++) {
211 inputIndex += " + axis_" + std::to_string(i);
212 if (i < fSize-1) inputIndex += " * " + strides[i].GetVal();
213 }
215 std::string scaleIndex;
216 for (size_t i = 0; i < fSize; i++) {
217 if (fShapeScale[i].dim != 1) {
218 if (!scaleIndex.empty()) scaleIndex += " + ";
219 scaleIndex += "axis_" + std::to_string(i);
220 if ( scaleStrides[i].dim != 1) scaleIndex += " * " + scaleStrides[i].GetVal();
221 }
222 }
223 if (scaleIndex.empty()) scaleIndex = "0";
224
226 std::string biasIndex;
227 for (size_t i = 0; i < fSize; i++) {
228 if (fShapeB[i].dim != 1) {
229 if (!biasIndex.empty()) biasIndex += " + ";
230 biasIndex += "axis_" + std::to_string(i);
231 if ( biasStrides[i].dim != 1) biasIndex += " * " + biasStrides[i].GetVal();
232 }
233 }
234 if (biasIndex.empty()) biasIndex = "0";
235
237 std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
238 for (size_t i = 1; i < fAxis; i++) {
239 axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
240 }
241
242
243 // compute mean and std-dev. Save in tensors if requested
244
245 out << SP << "// Compute the mean\n";
246 // Loop over all the dims in [0, fAxis)
247 for (size_t i = 0; i < fAxis; i++) {
248 std::string iIdx = "axis_" + std::to_string(i);
249 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
250 << "; " << iIdx << "++) {\n";
251 }
252 out << SP << SP << fType << " mean = 0.;\n";
253 // loop over the normalized dimensions (fAxis,....,N-1)
254 for (size_t j = fAxis; j < fSize; j++) {
255 std::string jIdx = "axis_" + std::to_string(j);
256 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
257 << "; " << jIdx << "++) {\n";
258 }
259 out << SP << SP << SP << "mean += tensor_" << fNX << "[" << inputIndex << "];\n";
260 for (size_t j = fAxis; j < fSize; j++) {
261 out << SP << SP << "}\n";
262 }
263 out << SP << SP << "mean /= " << fType << "(" << fNormalizedLength << ");\n";
264
265 // for (size_t i = fAxis; i < fSize; i++) {
266 // out << SP << "}\n";
267 // }
268 // tensor_" << fNMean << "[" << axesIndex << "]
269
270 out << SP << "// Compute the inverse Standard Deviation\n";
271 // Loop over the normalized dimensions
272 // for (size_t i = 0; i < fAxis; i++) {
273 // std::string iIdx = "axis_" + std::to_string(i);
274 // out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
275 // << "; " << iIdx << "++){\n";
276 // }
277
278 // Set sum = 0
279 out << SP << SP << fType << " sum = 0.;\n";
280 // loop over all the dims in [0, fAxis)
281 for (size_t j = fAxis; j < fSize; j++) {
282 std::string jIdx = "axis_" + std::to_string(j);
283 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
284 << "; " << jIdx << "++){\n";
285 }
286 out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << inputIndex << "] - mean;\n";
287 out << SP << SP << SP << "sum += tmp*tmp;\n";
288 for (size_t j = fAxis; j < fSize; j++) {
289 out << SP << SP << "}\n";
290 }
291 out << SP << SP << fType << " invStdDev = 1 / std::sqrt(";
292 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
293
294 // for (size_t i = 0; i < fAxis; i++) {
295 // out << SP << "}\n";
296 // }
297
298 // set output mean and invStdDev if requested
299 if (!fNMean.empty())
300 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = mean;\n";
301 if (!fNInvStdDev.empty())
302 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = invStdDev;\n";
303
304 // scale and add bias
305
306 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
307 // for (size_t i = 0; i < fAxis; i++) {
308 // std::string iIdx = "axis_" + std::to_string(i);
309 // out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
310 // << "; " << iIdx << "++){\n";
311 // }
312
313 for (size_t j = fAxis; j < fSize; j++) {
314 std::string jIdx = "axis_" + std::to_string(j);
315 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx
316 << "++){\n";
317 }
318 out << SP << SP << SP << "tensor_" << fNY << "[" << inputIndex << "] = tensor_" << fNScale;
319 out << "[" << scaleIndex << "] * invStdDev * (tensor_" << fNX << "[" << inputIndex << "] - mean)";
320
321 // add bias if needed
322 if (!fNB.empty())
323 // assume bias has index as scale
324 out << " + tensor_" << fNB << "[" << biasIndex << "]";
325 out << ";\n";
326
327 for (size_t j = fAxis; j < fSize; j++) {
328 out << SP << SP << "}\n";
329 }
330 for (size_t i = fAxis; i < fSize; i++) {
331 out << SP << "}\n";
332 }
333
334 // if (!fNB.empty()) {
335 // std::string bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
336 // out << SP << "// Add the bias to Y\n";
337 // out << SP << "int " << opName << "_n = " << fLength << ";\n";
338 // out << SP << "float " << opName << "_alpha = 1.;\n";
339 // out << SP << "int " << opName << "_inc = 1;\n";
340 // out << SP << "BLAS::saxpy_(&" << opName << "_n, &" << opName << "_alpha, " << bias << ", &";
341 // out << opName << "_inc, " << "tensor_" << fNY << ", &" << opName << "_inc);\n";
342 // }
343
344 return out.str();
345 }
346
347 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
348
349 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
350};
351
352} // namespace SOFIE
353} // namespace Experimental
354} // namespace TMVA
355
356#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void AddNeededStdLib(std::string libname)
std::vector< Dim > GetDimTensorShape(const std::string &name) const
Definition RModel.cxx:65
bool IsDynamicTensor(const std::string &name) const
Definition RModel.cxx:247
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:262
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:122
ETensorType GetTensorType(std::string name) const
Definition RModel.cxx:90
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:47
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:42
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:48
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
create variable transformations