Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFieldUtils.cxx
Go to the documentation of this file.
1/// \file RFieldUtils.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
4/// \date 2024-11-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8#include "RFieldUtils.hxx"
9
10#include <ROOT/RField.hxx>
11#include <ROOT/RLogger.hxx>
12#include <ROOT/RNTupleUtil.hxx>
13
14#include <TClass.h>
15#include <TClassEdit.h>
16#include <TDictAttributeMap.h>
17
18#include <algorithm>
19#include <charconv>
20#include <limits>
21#include <string>
22#include <string_view>
23#include <system_error>
24#include <unordered_map>
25#include <utility>
26#include <vector>
27
28namespace {
29
30const std::unordered_map<std::string_view, std::string_view> typeTranslationMap{
31 {"Bool_t", "bool"},
32 {"Float_t", "float"},
33 {"Double_t", "double"},
34 {"string", "std::string"},
35
36 {"byte", "std::byte"},
37 {"Char_t", "char"},
38 {"int8_t", "std::int8_t"},
39 {"UChar_t", "unsigned char"},
40 {"uint8_t", "std::uint8_t"},
41
42 {"Short_t", "short"},
43 {"int16_t", "std::int16_t"},
44 {"UShort_t", "unsigned short"},
45 {"uint16_t", "std::uint16_t"},
46
47 {"Int_t", "int"},
48 {"int32_t", "std::int32_t"},
49 {"UInt_t", "unsigned int"},
50 {"unsigned", "unsigned int"},
51 {"uint32_t", "std::uint32_t"},
52
53 // Long_t and ULong_t follow the platform's size of long and unsigned long: They are 64 bit on 64-bit Linux and
54 // macOS, but 32 bit on 32-bit platforms and Windows (regardless of pointer size).
55 {"Long_t", "long"},
56 {"ULong_t", "unsigned long"},
57
58 {"Long64_t", "long long"},
59 {"int64_t", "std::int64_t"},
60 {"ULong64_t", "unsigned long long"},
61 {"uint64_t", "std::uint64_t"}};
62
63// Recursively normalizes a template argument using the regular type name normalizer F as a helper.
64template <typename F>
65std::string GetNormalizedTemplateArg(const std::string &arg, F fnTypeNormalizer)
66{
67 R__ASSERT(!arg.empty());
68
69 if (std::isdigit(arg[0]) || arg[0] == '-') {
70 // Integer template argument
72 }
73
74 std::string qualifier;
75 // Type name template argument; template arguments must keep their CV qualifier
76 if (arg.substr(0, 6) == "const " || (arg.length() > 14 && arg.substr(9, 6) == "const "))
77 qualifier += "const ";
78 if (arg.substr(0, 9) == "volatile " || (arg.length() > 14 && arg.substr(6, 9) == "volatile "))
79 qualifier += "volatile ";
80 return qualifier + fnTypeNormalizer(arg);
81}
82
83std::pair<std::string, std::string> SplitTypePrefixFromTemplateArgs(const std::string &typeName)
84{
85 auto idxOpen = typeName.find_first_of("<");
86 if (idxOpen == std::string::npos)
87 return {typeName, ""};
88
89 R__ASSERT(idxOpen > 0);
90 R__ASSERT(typeName.back() == '>');
91 R__ASSERT((typeName.size() - 1) > idxOpen);
92
93 return {typeName.substr(0, idxOpen), typeName.substr(idxOpen + 1, typeName.size() - idxOpen - 2)};
94}
95
96} // namespace
97
98std::string ROOT::Experimental::Internal::GetCanonicalTypePrefix(const std::string &typeName)
99{
100 std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
101 if (canonicalType.substr(0, 7) == "struct ") {
102 canonicalType.erase(0, 7);
103 } else if (canonicalType.substr(0, 5) == "enum ") {
104 canonicalType.erase(0, 5);
105 } else if (canonicalType.substr(0, 2) == "::") {
106 canonicalType.erase(0, 2);
107 }
108
109 if (canonicalType.substr(0, 6) == "array<") {
110 canonicalType = "std::" + canonicalType;
111 } else if (canonicalType.substr(0, 7) == "atomic<") {
112 canonicalType = "std::" + canonicalType;
113 } else if (canonicalType.substr(0, 7) == "bitset<") {
114 canonicalType = "std::" + canonicalType;
115 } else if (canonicalType.substr(0, 4) == "map<") {
116 canonicalType = "std::" + canonicalType;
117 } else if (canonicalType.substr(0, 9) == "multimap<") {
118 canonicalType = "std::" + canonicalType;
119 } else if (canonicalType.substr(0, 9) == "multiset<") {
120 canonicalType = "std::" + canonicalType;
121 }
122 if (canonicalType.substr(0, 5) == "pair<") {
123 canonicalType = "std::" + canonicalType;
124 } else if (canonicalType.substr(0, 4) == "set<") {
125 canonicalType = "std::" + canonicalType;
126 } else if (canonicalType.substr(0, 6) == "tuple<") {
127 canonicalType = "std::" + canonicalType;
128 } else if (canonicalType.substr(0, 11) == "unique_ptr<") {
129 canonicalType = "std::" + canonicalType;
130 } else if (canonicalType.substr(0, 14) == "unordered_map<") {
131 canonicalType = "std::" + canonicalType;
132 } else if (canonicalType.substr(0, 19) == "unordered_multimap<") {
133 canonicalType = "std::" + canonicalType;
134 } else if (canonicalType.substr(0, 19) == "unordered_multiset<") {
135 canonicalType = "std::" + canonicalType;
136 } else if (canonicalType.substr(0, 14) == "unordered_set<") {
137 canonicalType = "std::" + canonicalType;
138 } else if (canonicalType.substr(0, 8) == "variant<") {
139 canonicalType = "std::" + canonicalType;
140 } else if (canonicalType.substr(0, 7) == "vector<") {
141 canonicalType = "std::" + canonicalType;
142 } else if (canonicalType.substr(0, 11) == "ROOT::RVec<") {
143 canonicalType = "ROOT::VecOps::RVec<" + canonicalType.substr(11);
144 }
145
146 if (auto it = typeTranslationMap.find(canonicalType); it != typeTranslationMap.end()) {
147 canonicalType = it->second;
148 }
149
150 // Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
151 if (canonicalType == "signed char") {
153 } else if (canonicalType == "unsigned char") {
155 } else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
156 canonicalType == "signed short int") {
158 } else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
160 } else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
162 } else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
164 } else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
165 canonicalType == "signed long int") {
167 } else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
169 } else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
170 canonicalType == "signed long long int") {
172 } else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
174 }
175
176 return canonicalType;
177}
178
180{
182 // RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
183 // (also in template parameters)
184 if (normName == "Double32_t")
185 return "double";
186
187 const auto [typePrefix, argList] = SplitTypePrefixFromTemplateArgs(normName);
188 if (argList.empty())
189 return typePrefix;
190
191 auto templateArgs = TokenizeTypeList(argList);
192 R__ASSERT(!templateArgs.empty());
193
194 normName = typePrefix + "<";
195 for (const auto &a : templateArgs) {
197 }
198 normName[normName.size() - 1] = '>';
199
200 return normName;
201}
202
204{
207 std::string normName{origName};
209 splitname.ShortType(normName, modType);
211
212 const auto [typePrefix, argList] = SplitTypePrefixFromTemplateArgs(normName);
213 if (argList.empty())
214 return normName;
215
216 auto templateArgs = TokenizeTypeList(argList);
217 R__ASSERT(!templateArgs.empty());
218
219 // Get default-initialized template arguments; we only need to do this for user-defined class types
220 auto expandedName = normName;
221 if ((expandedName.substr(0, 5) != "std::") && (expandedName.substr(0, 19) != "ROOT::VecOps::RVec<")) {
222 auto cl = TClass::GetClass(origName.c_str());
223 if (cl)
224 expandedName = cl->GetName();
225 }
228
229 normName = typePrefix + "<";
230 for (const auto &a : templateArgs) {
232 }
233 for (std::size_t i = templateArgs.size(); i < expandedTemplateArgs.size(); ++i) {
235 }
236 normName[normName.size() - 1] = '>';
237
238 return normName;
239}
240
242{
243 return std::to_string(val);
244}
245
246std::string ROOT::Experimental::Internal::GetNormalizedInteger(unsigned long long val)
247{
248 if (val > std::numeric_limits<std::int64_t>::max())
249 return std::to_string(val) + "u";
250 return std::to_string(val);
251}
252
260
262{
263 std::size_t nChars = 0;
264 long long res = std::stoll(intToken, &nChars);
265 if (nChars == intToken.size())
266 return res;
267
268 assert(nChars < intToken.size());
269 if (nChars == 0) {
270 throw RException(R__FAIL("invalid integer type token: " + intToken));
271 }
272
273 auto suffix = intToken.substr(nChars);
274 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
275 if (suffix == "L" || suffix == "LL")
276 return res;
277 if (res >= 0 && (suffix == "U" || suffix == "UL" || suffix == "ULL"))
278 return res;
279
280 throw RException(R__FAIL("invalid integer type token: " + intToken));
281}
282
284{
285 std::size_t nChars = 0;
286 unsigned long long res = std::stoull(uintToken, &nChars);
287 if (nChars == uintToken.size())
288 return res;
289
290 assert(nChars < uintToken.size());
291 if (nChars == 0) {
292 throw RException(R__FAIL("invalid integer type token: " + uintToken));
293 }
294
295 auto suffix = uintToken.substr(nChars);
296 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
297 if (suffix == "U" || suffix == "L" || suffix == "LL" || suffix == "UL" || suffix == "ULL")
298 return res;
299
300 throw RException(R__FAIL("invalid integer type token: " + uintToken));
301}
302
305{
306 auto am = cl->GetAttributeMap();
307 if (!am || !am->HasKey("rntuple.streamerMode"))
308 return ERNTupleSerializationMode::kUnset;
309
310 std::string value = am->GetPropertyAsString("rntuple.streamerMode");
311 std::transform(value.begin(), value.end(), value.begin(), ::toupper);
312 if (value == "TRUE") {
313 return ERNTupleSerializationMode::kForceStreamerMode;
314 } else if (value == "FALSE") {
315 return ERNTupleSerializationMode::kForceNativeMode;
316 } else {
317 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "invalid setting for 'rntuple.streamerMode' class attribute: "
318 << am->GetPropertyAsString("rntuple.streamerMode");
319 return ERNTupleSerializationMode::kUnset;
320 }
321}
322
323std::tuple<std::string, std::vector<std::size_t>>
325{
326 std::vector<std::size_t> sizeVec;
327
328 // Only parse outer array definition, i.e. the right `]` should be at the end of the type name
329 std::string prefix{typeName};
330 while (prefix.back() == ']') {
331 auto posRBrace = prefix.size() - 1;
332 auto posLBrace = prefix.find_last_of('[', posRBrace);
333 if (posLBrace == std::string_view::npos) {
334 throw RException(R__FAIL(std::string("invalid array type: ") + typeName));
335 }
336
337 const std::size_t size = ParseUIntTypeToken(prefix.substr(posLBrace + 1, posRBrace - posLBrace - 1));
338 if (size == 0) {
339 throw RException(R__FAIL(std::string("invalid array size: ") + typeName));
340 }
341
342 sizeVec.insert(sizeVec.begin(), size);
343 prefix.resize(posLBrace);
344 }
345 return std::make_tuple(prefix, sizeVec);
346}
347
348std::vector<std::string> ROOT::Experimental::Internal::TokenizeTypeList(std::string_view templateType)
349{
350 std::vector<std::string> result;
351 if (templateType.empty())
352 return result;
353
354 const char *eol = templateType.data() + templateType.length();
355 const char *typeBegin = templateType.data();
356 const char *typeCursor = templateType.data();
357 unsigned int nestingLevel = 0;
358 while (typeCursor != eol) {
359 switch (*typeCursor) {
360 case '<': ++nestingLevel; break;
361 case '>': --nestingLevel; break;
362 case ',':
363 if (nestingLevel == 0) {
364 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
365 typeBegin = typeCursor + 1;
366 }
367 break;
368 }
369 typeCursor++;
370 }
371 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
372 return result;
373}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
static std::string TypeName()
Definition RField.hxx:289
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
const_iterator begin() const
const_iterator end() const
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3069
TDictAttributeMap * GetAttributeMap() const
std::vector< std::string > TokenizeTypeList(std::string_view templateType)
Used in RFieldBase::Create() in order to get the comma-separated list of template types E....
std::string GetCanonicalTypePrefix(const std::string &typeName)
Applies RNTuple specific type name normalization rules (see specs) that help the string parsing in RF...
ERNTupleSerializationMode
Possible settings for the "rntuple.streamerMode" class attribute in the dictionary.
std::string GetNormalizedInteger(const std::string &intTemplateArg)
Appends 'll' or 'ull' to the where necessary and strips the suffix if not needed.
std::tuple< std::string, std::vector< std::size_t > > ParseArrayType(const std::string &typeName)
Parse a type name of the form T[n][m]... and return the base type T and a vector that contains,...
ERNTupleSerializationMode GetRNTupleSerializationMode(TClass *cl)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::string GetNormalizedUnresolvedTypeName(const std::string &origName)
Applies all RNTuple type normalization rules except typedef resolution.
unsigned long long ParseUIntTypeToken(const std::string &uintToken)
long long ParseIntTypeToken(const std::string &intToken)
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::string CleanType(const char *typeDesc, int mode=0, const char **tail=nullptr)
Cleanup type description, redundant blanks removed and redundant tail ignored return *tail = pointer ...
@ kDropComparator
Definition TClassEdit.h:83
@ kDropStlDefault
Definition TClassEdit.h:82