Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
17 bool fCastToFloat = false; // flag to indicate if operation 1 are in floats (to be impl)
21
22 std::string fNX;
23 std::string fNScale;
24 std::string fNB;
25 std::string fNY;
26 std::string fNMean;
27 std::string fNInvStdDev;
28
29 std::string fNCastedX;
30 std::string fNNormalizedX;
31 std::string fNBroadcastedB;
32
33 std::vector<Dim> fShapeX;
34 std::vector<Dim> fShapeScale;
35 std::vector<Dim> fShapeB;
36 std::vector<Dim> fShapeY;
37 std::vector<Dim> fShapeMean;
38 std::vector<Dim> fShapeInvStdDev;
39
40 size_t fAxis; // axis in [0, size)
41 size_t fSize; // Size of the input
42 // size_t fAxisDim;
43
44 std::vector<Dim> fNormalizedShape; // shape from X[ axis,...,N-1]
45 std::vector<Dim> fAxesShape; // shape from X[0,..,axis-1]
46 // lengths in string format
47 std::string fLength; // Length of the input
48 std::string fNormalizedLength;
49 std::string fAxesLength;
50
51 std::string fType;
52
53public:
55
56 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
57 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
58 const std::string &nameMean, const std::string &nameInvStdDev)
59 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
60 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
61 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
62 {
64 if (!fNB.empty()){
65 fInputTensorNames.emplace_back(fNB);
66 }
67
69 if (!fNMean.empty()){
70 fOutputTensorNames.emplace_back(fNMean);
71 }
72 if (!fNInvStdDev.empty()){
73 fOutputTensorNames.emplace_back(fNInvStdDev);
74 }
75 }
76
77 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
78
79 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
80
81 void Initialize(RModel& model) override {
82 if (!model.CheckIfTensorAlreadyExist(fNX)) {
83 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Tensor " + fNX + " not found.");
84 }
85 bool isDynamic = model.IsDynamicTensor(fNX);
88 // Type of the output
90 // Size of the input
91 fSize = fShapeX.size();
92 // Axis in [0, size)
94 // Shape of fShapeX[0, ..., fAxis)
95 fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
96 // Length of the axes
98 // Shape of fShapeX[fAxis, ..., fSize)
99 fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
100 // Length of the normalized axis
102 // length of the input
104 // Type of mean and std
106 // Mean
107 if (!fNMean.empty()) {
108 // cannot use initializer list with one element since it is ambiguous
109 if (isDynamic)
110 // add size_t(-1) to indicate that shape is an expression
111 model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
112 else
113 model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
114 }
115 // Inverse Standard Deviation
116 if (!fNInvStdDev.empty()) {
117 if (isDynamic)
118 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
119 else
120 model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
121 }
122 // if mean and stdev are not empty they are not defined in the output list
123 // Cast X to float
124 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
125 fCastToFloat = true;
126 fType = "float";
127 }
128 // scale shape
130 // appends 1 to scale shapes if missing
131 size_t dimScale = fShapeScale.size();
132 if (dimScale < fSize) {
133 for (size_t i = 0; i < fSize-dimScale; i++)
134 fShapeScale.insert(fShapeScale.begin(), Dim{1});
135 }
136 // check also shape if consistent now
137 for (size_t i = 0; i < fSize; i++) {
138 if (fShapeScale[i].dim != 1 && fShapeScale[i] != fShapeX[i])
139 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Scale Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale));
140 }
141 if (!fNB.empty()) {
143 // appends 1 to bias shapes if missing
144 size_t dimB = fShapeB.size();
145 if (dimB < fShapeX.size()) {
146 for (size_t i = 0; i < fSize-dimB; i++)
147 fShapeB.insert(fShapeB.begin(), Dim{1});
148 }
149 for (size_t i = 0; i < fSize; i++) {
150 if (fShapeB[i].dim != 1 && fShapeB[i] != fShapeX[i])
151 throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Bias Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale));
152 }
153 }
154
156 if (model.Verbose()){
157 std::cout << "LayerNormalization : " << fNX << " -> " << fNY << " shape " << ConvertDimShapeToString(fShapeY)
158 << " using bias and scale with shapes " << ConvertDimShapeToString(fShapeB) << " " << ConvertDimShapeToString(fShapeScale)
159 << std::endl;
160 }
161
162 model.AddNeededStdLib("cmath");
163 }
164
165 std::string GenerateInitCode() override
166 {
167 std::stringstream out;
168 if (!fNBroadcastedB.empty()) {
169 out << SP << "// Broadcasting the bias of LayerNormalization op\n";
170 out << SP << "{\n";
171 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_";
172 out << fNB << ", " << ConvertDimShapeToString(fShapeB) << ", " << ConvertDimShapeToString(fShapeX) << ");\n";
173 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
174 out << SP << "delete[] data;\n";
175 out << SP << "}\n";
176 }
177 return out.str();
178 }
179
180 std::string Generate(std::string opName) override
181 {
182 opName = "op_" + opName;
183 if (fShapeX.empty()) {
184 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + opName +
185 " called to generate without being initialized first.");
186 }
187
188 std::stringstream out;
189
190 out << "//---- Layer Normalization operator " << opName << "\n";
191
192 // Loop over all the normalized axes i.e. [axis, ..., size)
193 std::vector<std::string> inputShape(fSize);
194
195 for (size_t i = 0; i < fSize; i++) {
196 inputShape[i] = fShapeX[i].GetVal();
197 }
198
200 std::string inputIndex = "axis_0 * " + strides[0].GetVal();
201 for (size_t i = 1; i < fSize; i++) {
202 inputIndex += " + axis_" + std::to_string(i);
203 if (i < fSize-1) inputIndex += " * " + strides[i].GetVal();
204 }
206 std::string scaleIndex;
207 for (size_t i = 0; i < fSize; i++) {
208 if (fShapeScale[i].dim != 1) {
209 if (!scaleIndex.empty()) scaleIndex += " + ";
210 scaleIndex += "axis_" + std::to_string(i);
211 if ( scaleStrides[i].dim != 1) scaleIndex += " * " + scaleStrides[i].GetVal();
212 }
213 }
214 if (scaleIndex.empty()) scaleIndex = "0";
215
217 std::string biasIndex;
218 for (size_t i = 0; i < fSize; i++) {
219 if (fShapeB[i].dim != 1) {
220 if (!biasIndex.empty()) biasIndex += " + ";
221 biasIndex += "axis_" + std::to_string(i);
222 if ( biasStrides[i].dim != 1) biasIndex += " * " + biasStrides[i].GetVal();
223 }
224 }
225 if (biasIndex.empty()) biasIndex = "0";
226
228 std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
229 for (size_t i = 1; i < fAxis; i++) {
230 axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
231 }
232
233
234 // compute mean and std-dev. Save in tensors if requested
235
236 out << SP << "// Compute the mean\n";
237
238 // Loop over all the outer dims in [0, fAxis)
239 for (size_t i = 0; i < fAxis; i++) {
240 std::string iIdx = "axis_" + std::to_string(i);
241 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i]
242 << "; " << iIdx << "++) {\n";
243 }
244 out << SP << SP << fType << " mean = 0.;\n";
245 // loop over the normalized dimensions (fAxis,....,N-1)
246 for (size_t j = fAxis; j < fSize; j++) {
247 std::string jIdx = "axis_" + std::to_string(j);
248 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
249 << "; " << jIdx << "++) {\n";
250 }
251 out << SP << SP << SP << "mean += tensor_" << fNX << "[" << inputIndex << "];\n";
252 for (size_t j = fAxis; j < fSize; j++) {
253 out << SP << SP << "}\n";
254 }
255 out << SP << SP << "mean /= " << fType << "(" << fNormalizedLength << ");\n";
256
257
258 out << SP << "// Compute the inverse Standard Deviation\n";
259
260 // Set sum = 0
261 out << SP << SP << fType << " sum = 0.;\n";
262 // loop over all the dims in [0, fAxis)
263 for (size_t j = fAxis; j < fSize; j++) {
264 std::string jIdx = "axis_" + std::to_string(j);
265 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j]
266 << "; " << jIdx << "++){\n";
267 }
268 out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << inputIndex << "] - mean;\n";
269 out << SP << SP << SP << "sum += tmp*tmp;\n";
270 for (size_t j = fAxis; j < fSize; j++) {
271 out << SP << SP << "}\n";
272 }
273 out << SP << SP << fType << " invStdDev = 1 / std::sqrt(";
274 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
275
276
277 // set output mean and invStdDev if requested
278 if (!fNMean.empty())
279 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = mean;\n";
280 if (!fNInvStdDev.empty())
281 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = invStdDev;\n";
282
283 // scale and add bias
284
285 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
286
287 for (size_t j = fAxis; j < fSize; j++) {
288 std::string jIdx = "axis_" + std::to_string(j);
289 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx
290 << "++){\n";
291 }
292 out << SP << SP << SP << "tensor_" << fNY << "[" << inputIndex << "] = tensor_" << fNScale;
293 out << "[" << scaleIndex << "] * invStdDev * (tensor_" << fNX << "[" << inputIndex << "] - mean)";
294
295 // add bias if needed
296 if (!fNB.empty())
297 // assume bias has index as scale
298 out << " + tensor_" << fNB << "[" << biasIndex << "]";
299 out << ";\n";
300
301 // close loops on normalizing dim [..,fAxis,...fSize-1]
302 for (size_t j = fAxis; j < fSize; j++) {
303 out << SP << SP << "}\n";
304 }
305 // close loops on the other dimensions [0,...,fAxis]
306 for (size_t i = 0; i < fAxis; i++) {
307 out << SP << "}\n";
308 }
309
310 return out.str();
311 }
312
313 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
314
315 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
316};
317
318} // namespace SOFIE
319} // namespace Experimental
320} // namespace TMVA
321
322#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void AddNeededStdLib(std::string libname)
std::vector< Dim > GetDimTensorShape(const std::string &name) const
Definition RModel.cxx:87
bool IsDynamicTensor(const std::string &name) const
Definition RModel.cxx:269
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:284
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:144
ETensorType GetTensorType(std::string name) const
Definition RModel.cxx:112
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::vector< std::string_view > fInputTensorNames
Definition ROperator.hxx:50
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:45
std::vector< std::string_view > fOutputTensorNames
Definition ROperator.hxx:51
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
create variable transformations