Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFieldUtils.cxx
Go to the documentation of this file.
1/// \file RFieldUtils.cxx
2/// \ingroup NTuple
3/// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
4/// \date 2024-11-19
5
7
8#include <ROOT/RField.hxx>
9#include <ROOT/RLogger.hxx>
10#include <ROOT/RNTupleUtil.hxx>
11
12#include <TClass.h>
13#include <TClassEdit.h>
14#include <TDictAttributeMap.h>
15
16#include <algorithm>
17#include <charconv>
18#include <limits>
19#include <string>
20#include <string_view>
21#include <system_error>
22#include <unordered_map>
23#include <utility>
24#include <vector>
25
26namespace {
27
28const std::unordered_map<std::string_view, std::string_view> typeTranslationMap{
29 {"Bool_t", "bool"},
30 {"Float_t", "float"},
31 {"Double_t", "double"},
32 {"string", "std::string"},
33
34 {"byte", "std::byte"},
35 {"Char_t", "char"},
36 {"int8_t", "std::int8_t"},
37 {"UChar_t", "unsigned char"},
38 {"uint8_t", "std::uint8_t"},
39
40 {"Short_t", "short"},
41 {"int16_t", "std::int16_t"},
42 {"UShort_t", "unsigned short"},
43 {"uint16_t", "std::uint16_t"},
44
45 {"Int_t", "int"},
46 {"int32_t", "std::int32_t"},
47 {"UInt_t", "unsigned int"},
48 {"unsigned", "unsigned int"},
49 {"uint32_t", "std::uint32_t"},
50
51 // Long_t and ULong_t follow the platform's size of long and unsigned long: They are 64 bit on 64-bit Linux and
52 // macOS, but 32 bit on 32-bit platforms and Windows (regardless of pointer size).
53 {"Long_t", "long"},
54 {"ULong_t", "unsigned long"},
55
56 {"Long64_t", "long long"},
57 {"int64_t", "std::int64_t"},
58 {"ULong64_t", "unsigned long long"},
59 {"uint64_t", "std::uint64_t"}};
60
61// Recursively normalizes a template argument using the regular type name normalizer F as a helper.
62template <typename F>
63std::string GetNormalizedTemplateArg(const std::string &arg, F fnTypeNormalizer)
64{
65 R__ASSERT(!arg.empty());
66
67 if (std::isdigit(arg[0]) || arg[0] == '-') {
68 // Integer template argument
70 }
71
72 std::string qualifier;
73 // Type name template argument; template arguments must keep their CV qualifier
74 if (arg.substr(0, 6) == "const " || (arg.length() > 14 && arg.substr(9, 6) == "const "))
75 qualifier += "const ";
76 if (arg.substr(0, 9) == "volatile " || (arg.length() > 14 && arg.substr(6, 9) == "volatile "))
77 qualifier += "volatile ";
78 return qualifier + fnTypeNormalizer(arg);
79}
80
81using AnglePos = std::pair<std::string::size_type, std::string::size_type>;
82std::vector<AnglePos> FindTemplateAngleBrackets(const std::string &typeName)
83{
84 std::vector<AnglePos> result;
85 std::string::size_type currentPos = 0;
86 while (currentPos < typeName.size()) {
87 const auto posOpen = typeName.find('<', currentPos);
88 if (posOpen == std::string::npos) {
89 // If there are no more templates, the function is done.
90 break;
91 }
92
93 auto posClose = posOpen + 1;
94 int level = 1;
95 while (posClose < typeName.size()) {
96 const auto c = typeName[posClose];
97 if (c == '<') {
98 level++;
99 } else if (c == '>') {
100 if (level == 1) {
101 break;
102 }
103 level--;
104 }
105 posClose++;
106 }
107 // We should have found a closing angle bracket at the right level.
108 R__ASSERT(posClose < typeName.size());
109 result.emplace_back(posOpen, posClose);
110
111 // If we are not at the end yet, the following two characeters should be :: for nested types.
112 if (posClose < typeName.size() - 1) {
113 R__ASSERT(typeName.substr(posClose + 1, 2) == "::");
114 }
115 currentPos = posClose + 1;
116 }
117
118 return result;
119}
120
121} // namespace
122
123std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
124{
125 std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
126 if (canonicalType.substr(0, 7) == "struct ") {
127 canonicalType.erase(0, 7);
128 } else if (canonicalType.substr(0, 5) == "enum ") {
129 canonicalType.erase(0, 5);
130 } else if (canonicalType.substr(0, 2) == "::") {
131 canonicalType.erase(0, 2);
132 }
133
134 // TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
135 // to remove them for RNTuple.
136 auto angle = canonicalType.find('<');
137 if (angle != std::string::npos) {
138 auto dst = canonicalType.begin() + angle;
139 auto end = canonicalType.end();
140 for (auto src = dst; src != end; ++src) {
141 if (*src == ' ') {
142 auto next = src + 1;
143 if (next != end && *next == '>') {
144 // Skip this space before a closing angle bracket.
145 continue;
146 }
147 }
148 *(dst++) = *src;
149 }
150 canonicalType.erase(dst, end);
151 }
152
153 if (canonicalType.substr(0, 6) == "array<") {
154 canonicalType = "std::" + canonicalType;
155 } else if (canonicalType.substr(0, 7) == "atomic<") {
156 canonicalType = "std::" + canonicalType;
157 } else if (canonicalType.substr(0, 7) == "bitset<") {
158 canonicalType = "std::" + canonicalType;
159 } else if (canonicalType.substr(0, 4) == "map<") {
160 canonicalType = "std::" + canonicalType;
161 } else if (canonicalType.substr(0, 9) == "multimap<") {
162 canonicalType = "std::" + canonicalType;
163 } else if (canonicalType.substr(0, 9) == "multiset<") {
164 canonicalType = "std::" + canonicalType;
165 }
166 if (canonicalType.substr(0, 5) == "pair<") {
167 canonicalType = "std::" + canonicalType;
168 } else if (canonicalType.substr(0, 4) == "set<") {
169 canonicalType = "std::" + canonicalType;
170 } else if (canonicalType.substr(0, 6) == "tuple<") {
171 canonicalType = "std::" + canonicalType;
172 } else if (canonicalType.substr(0, 11) == "unique_ptr<") {
173 canonicalType = "std::" + canonicalType;
174 } else if (canonicalType.substr(0, 14) == "unordered_map<") {
175 canonicalType = "std::" + canonicalType;
176 } else if (canonicalType.substr(0, 19) == "unordered_multimap<") {
177 canonicalType = "std::" + canonicalType;
178 } else if (canonicalType.substr(0, 19) == "unordered_multiset<") {
179 canonicalType = "std::" + canonicalType;
180 } else if (canonicalType.substr(0, 14) == "unordered_set<") {
181 canonicalType = "std::" + canonicalType;
182 } else if (canonicalType.substr(0, 8) == "variant<") {
183 canonicalType = "std::" + canonicalType;
184 } else if (canonicalType.substr(0, 7) == "vector<") {
185 canonicalType = "std::" + canonicalType;
186 } else if (canonicalType.substr(0, 11) == "ROOT::RVec<") {
187 canonicalType = "ROOT::VecOps::RVec<" + canonicalType.substr(11);
188 }
189
190 if (auto it = typeTranslationMap.find(canonicalType); it != typeTranslationMap.end()) {
191 canonicalType = it->second;
192 }
193
194 // Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
195 if (canonicalType == "signed char") {
197 } else if (canonicalType == "unsigned char") {
199 } else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
200 canonicalType == "signed short int") {
202 } else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
204 } else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
206 } else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
208 } else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
209 canonicalType == "signed long int") {
211 } else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
213 } else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
214 canonicalType == "signed long long int") {
216 } else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
218 }
219
220 return canonicalType;
221}
222
224{
226 // RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
227 // (also in template parameters)
228 if (canonicalTypePrefix == "Double32_t")
229 return "double";
230
231 if (canonicalTypePrefix.find('<') == std::string::npos) {
232 // If there are no templates, the function is done.
233 return canonicalTypePrefix;
234 }
235
237 R__ASSERT(!angleBrackets.empty());
238
239 std::string normName;
240 std::string::size_type currentPos = 0;
241 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
242 const auto [posOpen, posClose] = angleBrackets[i];
243 // Append the type prefix until the open angle bracket.
245
246 const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
247 const auto templateArgs = TokenizeTypeList(argList);
248 R__ASSERT(!templateArgs.empty());
249
250 for (const auto &a : templateArgs) {
251 normName += GetNormalizedTemplateArg(a, GetRenormalizedTypeName) + ",";
252 }
253
254 normName[normName.size() - 1] = '>';
255 currentPos = posClose + 1;
256 }
257
258 // Append the rest of the type from the last closing angle bracket.
259 const auto lastClosePos = angleBrackets.back().second;
261
262 return normName;
263}
264
266{
270 std::string canonicalTypePrefix;
273
274 if (canonicalTypePrefix.find('<') == std::string::npos) {
275 // If there are no templates, the function is done.
276 return canonicalTypePrefix;
277 }
278
280 R__ASSERT(!angleBrackets.empty());
281
282 // For user-defined class types, we will need to get the default-initialized template arguments.
283 const bool isUserClass =
284 (canonicalTypePrefix.substr(0, 5) != "std::") && (canonicalTypePrefix.substr(0, 19) != "ROOT::VecOps::RVec<");
285
286 std::string normName;
287 std::string::size_type currentPos = 0;
288 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
289 const auto [posOpen, posClose] = angleBrackets[i];
290 // Append the type prefix until the open angle bracket.
292
293 const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
294 const auto templateArgs = TokenizeTypeList(argList);
295 R__ASSERT(!templateArgs.empty());
296
297 for (const auto &a : templateArgs) {
299 }
300
301 // For user-defined classes, append default-initialized template arguments.
302 if (isUserClass) {
303 const auto cl = TClass::GetClass(canonicalTypePrefix.substr(0, posClose + 1).c_str());
304 if (cl) {
305 const std::string expandedName = cl->GetName();
307 // We can have fewer pairs than angleBrackets, for example in case of type aliases.
309
311 const auto expandedArgList =
315
316 for (std::size_t j = templateArgs.size(); j < expandedTemplateArgs.size(); ++j) {
318 }
319 }
320 }
321
322 normName[normName.size() - 1] = '>';
323 currentPos = posClose + 1;
324 }
325
326 // Append the rest of the type from the last closing angle bracket.
327 const auto lastClosePos = angleBrackets.back().second;
329
330 return normName;
331}
332
333std::string ROOT::Internal::GetNormalizedInteger(long long val)
334{
335 return std::to_string(val);
336}
337
338std::string ROOT::Internal::GetNormalizedInteger(unsigned long long val)
339{
340 if (val > std::numeric_limits<std::int64_t>::max())
341 return std::to_string(val) + "u";
342 return std::to_string(val);
343}
344
352
353long long ROOT::Internal::ParseIntTypeToken(const std::string &intToken)
354{
355 std::size_t nChars = 0;
356 long long res = std::stoll(intToken, &nChars);
357 if (nChars == intToken.size())
358 return res;
359
360 assert(nChars < intToken.size());
361 if (nChars == 0) {
362 throw RException(R__FAIL("invalid integer type token: " + intToken));
363 }
364
365 auto suffix = intToken.substr(nChars);
366 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
367 if (suffix == "L" || suffix == "LL")
368 return res;
369 if (res >= 0 && (suffix == "U" || suffix == "UL" || suffix == "ULL"))
370 return res;
371
372 throw RException(R__FAIL("invalid integer type token: " + intToken));
373}
374
375unsigned long long ROOT::Internal::ParseUIntTypeToken(const std::string &uintToken)
376{
377 std::size_t nChars = 0;
378 unsigned long long res = std::stoull(uintToken, &nChars);
379 if (nChars == uintToken.size())
380 return res;
381
382 assert(nChars < uintToken.size());
383 if (nChars == 0) {
384 throw RException(R__FAIL("invalid integer type token: " + uintToken));
385 }
386
387 auto suffix = uintToken.substr(nChars);
388 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
389 if (suffix == "U" || suffix == "L" || suffix == "LL" || suffix == "UL" || suffix == "ULL")
390 return res;
391
392 throw RException(R__FAIL("invalid integer type token: " + uintToken));
393}
394
396{
397 auto am = cl->GetAttributeMap();
398 if (!am || !am->HasKey("rntuple.streamerMode"))
399 return ERNTupleSerializationMode::kUnset;
400
401 std::string value = am->GetPropertyAsString("rntuple.streamerMode");
402 std::transform(value.begin(), value.end(), value.begin(), ::toupper);
403 if (value == "TRUE") {
404 return ERNTupleSerializationMode::kForceStreamerMode;
405 } else if (value == "FALSE") {
406 return ERNTupleSerializationMode::kForceNativeMode;
407 } else {
408 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "invalid setting for 'rntuple.streamerMode' class attribute: "
409 << am->GetPropertyAsString("rntuple.streamerMode");
410 return ERNTupleSerializationMode::kUnset;
411 }
412}
413
414std::tuple<std::string, std::vector<std::size_t>> ROOT::Internal::ParseArrayType(const std::string &typeName)
415{
416 std::vector<std::size_t> sizeVec;
417
418 // Only parse outer array definition, i.e. the right `]` should be at the end of the type name
419 std::string prefix{typeName};
420 while (prefix.back() == ']') {
421 auto posRBrace = prefix.size() - 1;
422 auto posLBrace = prefix.rfind('[', posRBrace);
423 if (posLBrace == std::string_view::npos) {
424 throw RException(R__FAIL(std::string("invalid array type: ") + typeName));
425 }
426
427 const std::size_t size = ParseUIntTypeToken(prefix.substr(posLBrace + 1, posRBrace - posLBrace - 1));
428 if (size == 0) {
429 throw RException(R__FAIL(std::string("invalid array size: ") + typeName));
430 }
431
432 sizeVec.insert(sizeVec.begin(), size);
433 prefix.resize(posLBrace);
434 }
435 return std::make_tuple(prefix, sizeVec);
436}
437
438std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType)
439{
440 std::vector<std::string> result;
441 if (templateType.empty())
442 return result;
443
444 const char *eol = templateType.data() + templateType.length();
445 const char *typeBegin = templateType.data();
446 const char *typeCursor = templateType.data();
447 unsigned int nestingLevel = 0;
448 while (typeCursor != eol) {
449 switch (*typeCursor) {
450 case '<': ++nestingLevel; break;
451 case '>': --nestingLevel; break;
452 case ',':
453 if (nestingLevel == 0) {
454 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
455 typeBegin = typeCursor + 1;
456 }
457 break;
458 }
459 typeCursor++;
460 }
461 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
462 return result;
463}
464
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint angle
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Classes with dictionaries that can be inspected by TClass.
Definition RField.hxx:283
const_iterator begin() const
const_iterator end() const
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3073
TDictAttributeMap * GetAttributeMap() const
ERNTupleSerializationMode
Possible settings for the "rntuple.streamerMode" class attribute in the dictionary.
std::tuple< std::string, std::vector< std::size_t > > ParseArrayType(const std::string &typeName)
Parse a type name of the form T[n][m]... and return the base type T and a vector that contains,...
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
unsigned long long ParseUIntTypeToken(const std::string &uintToken)
std::string GetNormalizedInteger(const std::string &intTemplateArg)
Appends 'll' or 'ull' to the where necessary and strips the suffix if not needed.
ERNTupleSerializationMode GetRNTupleSerializationMode(TClass *cl)
std::string GetCanonicalTypePrefix(const std::string &typeName)
Applies RNTuple specific type name normalization rules (see specs) that help the string parsing in RF...
std::string GetNormalizedUnresolvedTypeName(const std::string &origName)
Applies all RNTuple type normalization rules except typedef resolution.
std::string GetRenormalizedDemangledTypeName(const std::type_info &ti)
Given a type info ask ROOT meta to demangle it, then renormalize the resulting type name for RNTuple.
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::vector< std::string > TokenizeTypeList(std::string_view templateType)
Used in RFieldBase::Create() in order to get the comma-separated list of template types E....
long long ParseIntTypeToken(const std::string &intToken)
std::string GetDemangledTypeName(const std::type_info &t)
std::string CleanType(const char *typeDesc, int mode=0, const char **tail=nullptr)
Cleanup type description, redundant blanks removed and redundant tail ignored return *tail = pointer ...
@ kDropComparator
Definition TClassEdit.h:83
@ kDropStlDefault
Definition TClassEdit.h:82