Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#include <ROOT/RError.hxx>
16#include <ROOT/RFieldBase.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RPage.hxx>
22#include <string_view>
23
24#include <RZip.h>
25#include <TError.h>
26
27#include <algorithm>
28#include <cstdint>
29#include <deque>
30#include <functional>
31#include <iostream>
32#include <set>
33#include <utility>
34
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
43 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
44 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::RFieldBase>
70{
71 if (GetStructure() == ROOT::ENTupleStructure::kStreamer) {
72 auto streamerField = std::make_unique<ROOT::RStreamerField>(GetFieldName(), GetTypeName());
73 streamerField->SetOnDiskId(fFieldId);
74 return streamerField;
75 }
76
77 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
78 // For forward compatibility, we allow this case and return an InvalidField.
79 if (GetStructure() == ROOT::ENTupleStructure::kUnknown) {
80 if (options.GetReturnInvalidOnError()) {
81 auto invalidField = std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), "",
83 invalidField->SetOnDiskId(fFieldId);
84 return invalidField;
85 } else {
86 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
87 }
88 }
89
90 // Untyped records and collections
91 if (GetTypeName().empty()) {
92 switch (GetStructure()) {
94 std::vector<std::unique_ptr<ROOT::RFieldBase>> memberFields;
95 memberFields.reserve(fLinkIds.size());
96 for (auto id : fLinkIds) {
97 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
98 auto field = memberDesc.CreateField(ntplDesc, options);
100 return field;
101 memberFields.emplace_back(std::move(field));
102 }
103 auto recordField = std::make_unique<ROOT::RRecordField>(GetFieldName(), std::move(memberFields));
104 recordField->SetOnDiskId(fFieldId);
105 return recordField;
106 }
108 if (fLinkIds.size() != 1) {
109 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
110 }
111 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
113 return itemField;
114 auto collectionField = ROOT::RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
115 collectionField->SetOnDiskId(fFieldId);
116 return collectionField;
117 }
118 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
119 }
120 }
121
122 try {
123 const auto &fieldName = GetFieldName();
124 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
125 // NOTE: Unwrap() here may throw an exception, hence the try block.
126 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
127 // error.
128 auto field = ROOT::Internal::CallFieldBaseCreate(fieldName, typeName, options, &ntplDesc, fFieldId).Unwrap();
129 field->SetOnDiskId(fFieldId);
130
131 for (auto &subfield : *field) {
132 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
133 subfield.SetOnDiskId(subfieldId);
135 auto &invalidField = static_cast<ROOT::RInvalidField &>(subfield);
136 // A subfield being invalid "infects" its entire ancestry.
137 return invalidField.Clone(fieldName);
138 }
139 }
140
141 return field;
142 } catch (const RException &ex) {
143 if (options.GetReturnInvalidOnError())
144 return std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
146 else
147 throw ex;
148 }
149}
150
152{
154 return false;
155
156 // Skip untyped structs
157 if (fTypeName.empty())
158 return false;
159
160 if (fStructure == ROOT::ENTupleStructure::kRecord) {
161 if (fTypeName.compare(0, 10, "std::pair<") == 0)
162 return false;
163 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
164 return false;
165 }
166
167 return true;
168}
169
170////////////////////////////////////////////////////////////////////////////////
171
173{
174 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
175 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
176 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
177 fValueRange == other.fValueRange;
178}
179
181{
182 RColumnDescriptor clone;
183 clone.fLogicalColumnId = fLogicalColumnId;
184 clone.fPhysicalColumnId = fPhysicalColumnId;
185 clone.fBitsOnStorage = fBitsOnStorage;
186 clone.fType = fType;
187 clone.fFieldId = fFieldId;
188 clone.fIndex = fIndex;
189 clone.fFirstElementIndex = fFirstElementIndex;
190 clone.fRepresentationIndex = fRepresentationIndex;
191 clone.fValueRange = fValueRange;
192 return clone;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196
199{
200 const auto N = fCumulativeNElements.size();
201 R__ASSERT(N > 0);
202 R__ASSERT(N == fPageInfos.size());
203
204 std::size_t left = 0;
205 std::size_t right = N - 1;
206 std::size_t midpoint = N;
207 while (left <= right) {
208 midpoint = (left + right) / 2;
209 if (fCumulativeNElements[midpoint] <= idxInCluster) {
210 left = midpoint + 1;
211 continue;
212 }
213
214 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
215 break;
216
217 right = midpoint - 1;
218 }
220
221 auto pageInfo = fPageInfos[midpoint];
222 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
224 R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
226}
227
228std::size_t
231 std::size_t pageSize)
232{
233 R__ASSERT(fPhysicalColumnId == columnRange.GetPhysicalColumnId());
234 R__ASSERT(!columnRange.IsSuppressed());
235
236 const auto nElements =
237 std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
238 [](std::size_t n, const auto &pageInfo) { return n + pageInfo.GetNElements(); });
239 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.GetNElements());
240
242 return 0U;
243 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
244
245 std::vector<RPageInfo> pageInfos;
246 // Synthesize new `RPageInfo`s as needed
247 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
251 pageInfo.SetNElements(std::min(nElementsPerPage, nRemainingElements));
254 locator.SetNBytesOnStorage(element.GetPackedSize(pageInfo.GetNElements()));
255 pageInfo.SetLocator(locator);
256 pageInfos.emplace_back(pageInfo);
257 nRemainingElements -= pageInfo.GetNElements();
258 }
259
260 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
261 std::make_move_iterator(fPageInfos.end()));
262 std::swap(fPageInfos, pageInfos);
264}
265
267{
268 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
269 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
270}
271
273{
274 std::uint64_t nbytes = 0;
275 for (const auto &pr : fPageRanges) {
276 for (const auto &pi : pr.second.GetPageInfos()) {
277 nbytes += pi.GetLocator().GetNBytesOnStorage();
278 }
279 }
280 return nbytes;
281}
282
284{
285 RClusterDescriptor clone;
286 clone.fClusterId = fClusterId;
287 clone.fFirstEntryIndex = fFirstEntryIndex;
288 clone.fNEntries = fNEntries;
289 clone.fColumnRanges = fColumnRanges;
290 for (const auto &d : fPageRanges)
291 clone.fPageRanges.emplace(d.first, d.second.Clone());
292 return clone;
293}
294
295////////////////////////////////////////////////////////////////////////////////
296
298{
299 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
300}
301
303{
305 clone.fContentId = fContentId;
306 clone.fTypeVersion = fTypeVersion;
307 clone.fTypeName = fTypeName;
308 clone.fContent = fContent;
309 return clone;
310}
311
312////////////////////////////////////////////////////////////////////////////////
313
315{
316 // clang-format off
317 return fName == other.fName &&
318 fDescription == other.fDescription &&
319 fNEntries == other.fNEntries &&
320 fGeneration == other.fGeneration &&
321 fFieldZeroId == other.fFieldZeroId &&
322 fFieldDescriptors == other.fFieldDescriptors &&
323 fColumnDescriptors == other.fColumnDescriptors &&
324 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
325 fClusterDescriptors == other.fClusterDescriptors;
326 // clang-format on
327}
328
330{
332 for (const auto &cd : fClusterDescriptors) {
333 if (!cd.second.ContainsColumn(physicalColumnId))
334 continue;
335 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
336 result = std::max(result, columnRange.GetFirstElementIndex() + columnRange.GetNElements());
337 }
338 return result;
339}
340
343{
344 std::string leafName(fieldName);
345 auto posDot = leafName.find_last_of('.');
346 if (posDot != std::string::npos) {
347 auto parentName = leafName.substr(0, posDot);
348 leafName = leafName.substr(posDot + 1);
349 parentId = FindFieldId(parentName, parentId);
350 }
351 auto itrFieldDesc = fFieldDescriptors.find(parentId);
352 if (itrFieldDesc == fFieldDescriptors.end())
354 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
355 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
356 return linkId;
357 }
359}
360
362{
364 return "";
365
366 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
367 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
368 if (prefix.empty())
369 return fieldDescriptor.GetFieldName();
370 return prefix + "." + fieldDescriptor.GetFieldName();
371}
372
374{
375 std::string typeName = fieldDesc.GetTypeName();
376
377 // ROOT v6.34, with spec versions before 1.0.0.1, did not properly renormalize the type name.
378 R__ASSERT(fVersionEpoch == 1);
379 if (fVersionMajor == 0 && fVersionMinor == 0 && fVersionPatch < 1) {
380 typeName = ROOT::Internal::GetRenormalizedTypeName(typeName);
381 }
382
383 return typeName;
384}
385
387{
388 return FindFieldId(fieldName, GetFieldZeroId());
389}
390
392 std::uint32_t columnIndex,
393 std::uint16_t representationIndex) const
394{
395 auto itr = fFieldDescriptors.find(fieldId);
396 if (itr == fFieldDescriptors.cend())
398 if (columnIndex >= itr->second.GetColumnCardinality())
400 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
401 if (itr->second.GetLogicalColumnIds().size() <= idx)
403 return itr->second.GetLogicalColumnIds()[idx];
404}
405
407 std::uint32_t columnIndex,
408 std::uint16_t representationIndex) const
409{
410 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
413 return GetColumnDescriptor(logicalId).GetPhysicalId();
414}
415
418{
419 if (GetNClusterGroups() == 0)
421
422 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
423
424 std::size_t cgLeft = 0;
425 std::size_t cgRight = GetNClusterGroups() - 1;
426 while (cgLeft <= cgRight) {
427 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
428 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
429 R__ASSERT(!clusterIds.empty());
430
431 const auto &clusterDesc = GetClusterDescriptor(clusterIds.front());
432 // this may happen if the RNTuple has an empty schema
433 if (!clusterDesc.ContainsColumn(physicalColumnId))
435
436 const auto firstElementInGroup = clusterDesc.GetColumnRange(physicalColumnId).GetFirstElementIndex();
438 // Look into the lower half of cluster groups
440 cgRight = cgMidpoint - 1;
441 continue;
442 }
443
444 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
445 if ((lastColumnRange.GetFirstElementIndex() + lastColumnRange.GetNElements()) <= index) {
446 // Look into the upper half of cluster groups
447 cgLeft = cgMidpoint + 1;
448 continue;
449 }
450
451 // Binary search in the current cluster group; since we already checked the element range boundaries,
452 // the element must be in that cluster group.
453 std::size_t clusterLeft = 0;
454 std::size_t clusterRight = clusterIds.size() - 1;
455 while (clusterLeft <= clusterRight) {
456 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
458 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
459
460 if (columnRange.Contains(index))
461 return clusterId;
462
463 if (columnRange.GetFirstElementIndex() > index) {
466 continue;
467 }
468
469 if (columnRange.GetFirstElementIndex() + columnRange.GetNElements() <= index) {
471 continue;
472 }
473 }
474 R__ASSERT(false);
475 }
477}
478
480{
481 if (GetNClusterGroups() == 0)
483
484 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
485
486 std::size_t cgLeft = 0;
487 std::size_t cgRight = GetNClusterGroups() - 1;
488 while (cgLeft <= cgRight) {
489 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
490 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
491
492 if (cgDesc.GetMinEntry() > entryIdx) {
494 cgRight = cgMidpoint - 1;
495 continue;
496 }
497
498 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
499 cgLeft = cgMidpoint + 1;
500 continue;
501 }
502
503 // Binary search in the current cluster group; since we already checked the element range boundaries,
504 // the element must be in that cluster group.
505 const auto &clusterIds = cgDesc.GetClusterIds();
506 R__ASSERT(!clusterIds.empty());
507 std::size_t clusterLeft = 0;
508 std::size_t clusterRight = clusterIds.size() - 1;
509 while (clusterLeft <= clusterRight) {
510 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
511 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
512
513 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
516 continue;
517 }
518
519 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
521 continue;
522 }
523
525 }
526 R__ASSERT(false);
527 }
529}
530
532{
533 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
534 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
535 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
536 // binary search code path remains tested.
537 const auto &clusterDesc = GetClusterDescriptor(clusterId);
538 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
539 return FindClusterId(firstEntryInNextCluster);
540}
541
543{
544 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
545 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
546 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
547 // binary search code path remains tested.
548 const auto &clusterDesc = GetClusterDescriptor(clusterId);
549 if (clusterDesc.GetFirstEntryIndex() == 0)
551 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
552}
553
554std::vector<ROOT::DescriptorId_t>
556{
557 auto fieldZeroId = desc.GetFieldZeroId();
558
559 std::vector<ROOT::DescriptorId_t> fields;
560 for (const auto fieldId : fFieldIdsOrder) {
561 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
562 fields.emplace_back(fieldId);
563 }
564 return fields;
565}
566
572
574 : fNTuple(ntuple)
575{
576 std::deque<ROOT::DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
577
578 while (!fieldIdQueue.empty()) {
579 auto currFieldId = fieldIdQueue.front();
580 fieldIdQueue.pop_front();
581
582 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
583 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
584
585 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
586 auto fieldId = field.GetId();
587 fieldIdQueue.push_back(fieldId);
588 }
589 }
590}
591
592std::vector<std::uint64_t> ROOT::RNTupleDescriptor::GetFeatureFlags() const
593{
594 std::vector<std::uint64_t> result;
595 unsigned int base = 0;
596 std::uint64_t flags = 0;
597 for (auto f : fFeatureFlags) {
598 if ((f > 0) && ((f % 64) == 0))
599 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
600 while (f > base + 64) {
601 result.emplace_back(flags);
602 flags = 0;
603 base += 64;
604 }
605 f -= base;
606 flags |= 1 << f;
607 }
608 result.emplace_back(flags);
609 return result;
610}
611
613 std::vector<RClusterDescriptor> &clusterDescs)
614{
616 if (iter == fClusterGroupDescriptors.end())
617 return R__FAIL("invalid attempt to add details of unknown cluster group");
618 if (iter->second.HasClusterDetails())
619 return R__FAIL("invalid attempt to re-populate cluster group details");
620 if (iter->second.GetNClusters() != clusterDescs.size())
621 return R__FAIL("mismatch of number of clusters");
622
623 std::vector<ROOT::DescriptorId_t> clusterIds;
624 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
625 clusterIds.emplace_back(clusterDescs[i].GetId());
626 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
627 if (!success) {
628 return R__FAIL("invalid attempt to re-populate existing cluster");
629 }
630 }
632 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
633 });
635 cgBuilder.AddSortedClusters(clusterIds);
636 iter->second = cgBuilder.MoveDescriptor().Unwrap();
637 return RResult<void>::Success();
638}
639
641{
643 if (iter == fClusterGroupDescriptors.end())
644 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
645 if (!iter->second.HasClusterDetails())
646 return R__FAIL("invalid attempt to drop details of cluster group summary");
647
648 for (auto clusterId : iter->second.GetClusterIds())
650 iter->second = iter->second.CloneSummary();
651 return RResult<void>::Success();
652}
653
654std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const RCreateModelOptions &options) const
655{
656 // Collect all top-level fields that have invalid columns (recursively): by default if we find any we throw an
657 // exception; if we are in ForwardCompatible mode, we proceed but skip of all those top-level fields.
658 std::unordered_set<ROOT::DescriptorId_t> invalidFields;
659 for (const auto &colDesc : GetColumnIterable()) {
661 auto fieldId = colDesc.GetFieldId();
662 while (1) {
663 const auto &field = GetFieldDescriptor(fieldId);
664 if (field.GetParentId() == GetFieldZeroId())
665 break;
666 fieldId = field.GetParentId();
667 }
668 invalidFields.insert(fieldId);
669
670 // No need to look for all invalid fields if we're gonna error out anyway
671 if (!options.GetForwardCompatible())
672 break;
673 }
674 }
675
676 if (!options.GetForwardCompatible() && !invalidFields.empty())
678 "cannot create Model: descriptor contains unknown column types. Use 'SetForwardCompatible(true)' on the "
679 "RCreateModelOptions to create a partial model containing only the fields made up by known columns."));
680
681 auto fieldZero = std::make_unique<ROOT::RFieldZero>();
682 fieldZero->SetOnDiskId(GetFieldZeroId());
683 auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
684 : RNTupleModel::Create(std::move(fieldZero));
686 createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
687 createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
688 for (const auto &topDesc : GetTopLevelFields()) {
689 if (invalidFields.count(topDesc.GetId()) > 0) {
690 // Field contains invalid columns: skip it
691 continue;
692 }
693
694 auto field = topDesc.CreateField(*this, createFieldOpts);
695
696 // If we got an InvalidField here, figure out if it's a hard error or if the field must simply be skipped.
697 // The only case where it's not a hard error is if the field has an unknown structure, as that case is
698 // covered by the ForwardCompatible flag (note that if the flag is off we would not get here
699 // in the first place, so we don't need to check for that flag again).
700 if (field->GetTraits() & ROOT::RFieldBase::kTraitInvalidField) {
701 const auto &invalid = static_cast<const RInvalidField &>(*field);
702 const auto cat = invalid.GetCategory();
704 if (mustThrow)
705 throw invalid.GetError();
706
707 // Not a hard error: skip the field and go on.
708 continue;
709 }
710
711 if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
712 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
713 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
714 });
715 } else {
716 model->AddField(std::move(field));
717 }
718 }
719 model->Freeze();
720 return model;
721}
722
724{
725 RNTupleDescriptor clone;
726 clone.fName = fName;
731 // OnDiskHeaderSize, OnDiskHeaderXxHash3 not copied because they may come from a merged header + extension header
732 // and therefore not represent the actual sources's header.
733 // OnDiskFooterSize not copied because it contains information beyond the schema, for example the clustering.
734
735 for (const auto &d : fFieldDescriptors)
736 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
737 for (const auto &d : fColumnDescriptors)
738 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
739
740 for (const auto &d : fExtraTypeInfoDescriptors)
741 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
743 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
744
745 return clone;
746}
747
749{
751
756
760 clone.fNEntries = fNEntries;
761 clone.fNClusters = fNClusters;
762 clone.fGeneration = fGeneration;
763 for (const auto &d : fClusterGroupDescriptors)
764 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
766 for (const auto &d : fClusterDescriptors)
767 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
768 return clone;
769}
770
771////////////////////////////////////////////////////////////////////////////////
772
774{
775 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
776 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
777}
778
780{
782 clone.fClusterGroupId = fClusterGroupId;
783 clone.fPageListLocator = fPageListLocator;
784 clone.fPageListLength = fPageListLength;
785 clone.fMinEntry = fMinEntry;
786 clone.fEntrySpan = fEntrySpan;
787 clone.fNClusters = fNClusters;
788 return clone;
789}
790
792{
793 RClusterGroupDescriptor clone = CloneSummary();
794 clone.fClusterIds = fClusterIds;
795 return clone;
796}
797
798////////////////////////////////////////////////////////////////////////////////
799
802 std::uint64_t firstElementIndex,
803 std::uint32_t compressionSettings,
805{
806 if (physicalId != pageRange.fPhysicalColumnId)
807 return R__FAIL("column ID mismatch");
808 if (fCluster.fColumnRanges.count(physicalId) > 0)
809 return R__FAIL("column ID conflict");
811 for (const auto &pi : pageRange.fPageInfos) {
812 columnRange.IncrementNElements(pi.GetNElements());
813 }
814 fCluster.fPageRanges[physicalId] = pageRange.Clone();
815 fCluster.fColumnRanges[physicalId] = columnRange;
816 return RResult<void>::Success();
817}
818
821{
822 if (fCluster.fColumnRanges.count(physicalId) > 0)
823 return R__FAIL("column ID conflict");
824
826 columnRange.SetPhysicalColumnId(physicalId);
827 columnRange.SetIsSuppressed(true);
828 fCluster.fColumnRanges[physicalId] = columnRange;
829 return RResult<void>::Success();
830}
831
834{
835 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
836 if (!columnRange.IsSuppressed())
837 continue;
838 R__ASSERT(columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex);
839
840 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.GetPhysicalColumnId());
841 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
842 // We expect only few columns and column representations per field, so we do a linear search
843 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
845 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
846 continue;
847 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
848 continue;
849
850 // Found corresponding column of a different column representation
851 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
852 if (otherColumnRange.IsSuppressed())
853 continue;
854
855 columnRange.SetFirstElementIndex(otherColumnRange.GetFirstElementIndex());
856 columnRange.SetNElements(otherColumnRange.GetNElements());
857 break;
858 }
859
860 if (columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex) {
861 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
862 std::to_string(columnRange.GetPhysicalColumnId()) +
863 ", cluster ID: " + std::to_string(fCluster.GetId()));
864 }
865 }
866 return RResult<void>::Success();
867}
868
871{
872 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
873 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
874 /// parent field in the hierarchy.
876 const auto &visitField, const auto &enterSubtree) -> void {
878 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
879 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
881 }
882 };
883
884 // Extended columns can only be part of the header extension
885 if (!desc.GetHeaderExtension())
886 return *this;
887
888 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
889 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
890 for (const auto &topLevelField : desc.GetTopLevelFields()) {
892 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
893 [&](ROOT::DescriptorId_t fieldId, std::uint64_t nRepetitions) {
894 for (const auto &c : desc.GetColumnIterable(fieldId)) {
895 const ROOT::DescriptorId_t physicalId = c.GetPhysicalId();
896 auto &columnRange = fCluster.fColumnRanges[physicalId];
897
898 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
899 // extension won't have on-disk metadata for the clusters that were already committed before the model
900 // was extended. Therefore, these need to be synthetically initialized upon reading.
901 if (columnRange.GetPhysicalColumnId() == ROOT::kInvalidDescriptorId) {
902 columnRange.SetPhysicalColumnId(physicalId);
903 columnRange.SetFirstElementIndex(0);
904 columnRange.SetNElements(0);
905 columnRange.SetIsSuppressed(c.IsSuppressedDeferredColumn());
906 }
907 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
908 // number of elements should have been if the column was not deferred; fix those and let
909 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
910 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
911 // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
912 // field zero excluding subfields of collection and variant fields.
913 if (c.IsDeferredColumn()) {
914 if (c.GetRepresentationIndex() == 0) {
915 columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
916 columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
917 } else {
918 // Deferred representations which are not the first cannot count on the number of elements being
919 // equal to Entries * nRepetitions because they might have been added in a later cluster. But they
920 // can rely on the first representation having the correct FirstElement/NElements (by definition
921 // the first representation cannot be an "extended" one), therefore they can just copy the value
922 // from it.
923 const auto &field = desc.GetFieldDescriptor(fieldId);
924 const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()];
925 const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId];
926 columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex());
927 columnRange.SetNElements(firstReprColumnRange.GetNElements());
928 }
929 if (!columnRange.IsSuppressed()) {
930 auto &pageRange = fCluster.fPageRanges[physicalId];
931 pageRange.fPhysicalColumnId = physicalId;
932 const auto element = ROOT::Internal::RColumnElementBase::Generate<void>(c.GetType());
933 pageRange.ExtendToFitColumnRange(columnRange, *element, ROOT::Internal::RPage::kPageZeroSize);
934 }
935 } else if (!columnRange.IsSuppressed()) {
936 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
937 }
938 }
939 },
941 }
942 return *this;
943}
944
946{
947 if (fCluster.fClusterId == ROOT::kInvalidDescriptorId)
948 return R__FAIL("unset cluster ID");
949 if (fCluster.fNEntries == 0)
950 return R__FAIL("empty cluster");
951 for (auto &pr : fCluster.fPageRanges) {
952 if (fCluster.fColumnRanges.count(pr.first) == 0) {
953 return R__FAIL("missing column range");
954 }
955 pr.second.fCumulativeNElements.clear();
956 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
958 for (const auto &pi : pr.second.fPageInfos) {
959 sum += pi.GetNElements();
960 pr.second.fCumulativeNElements.emplace_back(sum);
961 }
962 }
964 std::swap(result, fCluster);
965 return result;
966}
967
968////////////////////////////////////////////////////////////////////////////////
969
972{
974 builder.ClusterGroupId(clusterGroupDesc.GetId())
975 .PageListLocator(clusterGroupDesc.GetPageListLocator())
976 .PageListLength(clusterGroupDesc.GetPageListLength())
977 .MinEntry(clusterGroupDesc.GetMinEntry())
978 .EntrySpan(clusterGroupDesc.GetEntrySpan())
979 .NClusters(clusterGroupDesc.GetNClusters());
980 return builder;
981}
982
984{
985 if (fClusterGroup.fClusterGroupId == ROOT::kInvalidDescriptorId)
986 return R__FAIL("unset cluster group ID");
988 std::swap(result, fClusterGroup);
989 return result;
990}
991
992////////////////////////////////////////////////////////////////////////////////
993
995{
996 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
997 throw RException(R__FAIL("invalid extra type info content id"));
999 std::swap(result, fExtraTypeInfo);
1000 return result;
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////
1004
1006{
1007 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
1008 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
1009 return RResult<void>::Success();
1010}
1011
1013{
1014 if (fDescriptor.fVersionEpoch != RNTuple::kVersionEpoch) {
1015 return R__FAIL("unset or unsupported RNTuple epoch version");
1016 }
1017
1018 // Reuse field name validity check
1019 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
1020 if (!validName) {
1022 }
1023
1024 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
1025 // parent not properly set?
1026 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1027 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
1028 }
1029
1030 // Same number of columns in every column representation?
1031 const auto columnCardinality = fieldDesc.GetColumnCardinality();
1032 if (columnCardinality == 0)
1033 continue;
1034
1035 // In AddColumn, we already checked that all but the last representation are complete.
1036 // Check that the last column representation is complete, i.e. has all columns.
1037 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
1038 const auto nColumns = logicalColumnIds.size();
1039 // If we have only a single column representation, the following condition is true by construction
1040 if ((nColumns + 1) == columnCardinality)
1041 continue;
1042
1043 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
1044 if (lastColumn.GetIndex() + 1 != columnCardinality)
1045 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
1046 }
1047
1048 return RResult<void>::Success();
1049}
1050
1052{
1053 EnsureValidDescriptor().ThrowOnError();
1054 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
1055 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
1056 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
1057 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
1059 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
1060 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
1061 });
1063 std::swap(result, fDescriptor);
1064 return result;
1065}
1066
1068 std::uint16_t versionMinor, std::uint16_t versionPatch)
1069{
1071 throw RException(R__FAIL("unsupported RNTuple epoch version: " + std::to_string(versionEpoch)));
1072 }
1073 fDescriptor.fVersionEpoch = versionEpoch;
1074 fDescriptor.fVersionMajor = versionMajor;
1075 fDescriptor.fVersionMinor = versionMinor;
1076 fDescriptor.fVersionPatch = versionPatch;
1077}
1078
1080{
1081 fDescriptor.fVersionEpoch = RNTuple::kVersionEpoch;
1082 fDescriptor.fVersionMajor = RNTuple::kVersionMajor;
1083 fDescriptor.fVersionMinor = RNTuple::kVersionMinor;
1084 fDescriptor.fVersionPatch = RNTuple::kVersionPatch;
1085}
1086
1088 const std::string_view description)
1089{
1090 fDescriptor.fName = std::string(name);
1091 fDescriptor.fDescription = std::string(description);
1092}
1093
1095{
1096 if (flag % 64 == 0)
1097 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
1098 fDescriptor.fFeatureFlags.insert(flag);
1099}
1100
1102{
1103 if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId)
1104 return R__FAIL("invalid logical column id");
1105 if (fColumn.GetPhysicalId() == ROOT::kInvalidDescriptorId)
1106 return R__FAIL("invalid physical column id");
1107 if (fColumn.GetFieldId() == ROOT::kInvalidDescriptorId)
1108 return R__FAIL("invalid field id, dangling column");
1109
1110 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1111 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1112 // as we have no idea what the valid range is.
1113 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1114 // we try to read the actual data contained in it.
1115 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1116 const auto [minBits, maxBits] = ROOT::Internal::RColumnElementBase::GetValidBitRange(fColumn.GetType());
1117 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1118 return R__FAIL("invalid column bit width");
1119 }
1120
1121 return fColumn.Clone();
1122}
1123
1131
1134{
1136 fieldDesc.FieldVersion(field.GetFieldVersion())
1137 .TypeVersion(field.GetTypeVersion())
1138 .FieldName(field.GetFieldName())
1139 .FieldDescription(field.GetDescription())
1140 .TypeName(field.GetTypeName())
1141 .TypeAlias(field.GetTypeAlias())
1142 .Structure(field.GetStructure())
1143 .NRepetitions(field.GetNRepetitions());
1145 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1146 return fieldDesc;
1147}
1148
1150{
1151 if (fField.GetId() == ROOT::kInvalidDescriptorId) {
1152 return R__FAIL("invalid field id");
1153 }
1154 if (fField.GetStructure() == ROOT::ENTupleStructure::kInvalid) {
1155 return R__FAIL("invalid field structure");
1156 }
1157 // FieldZero is usually named "" and would be a false positive here
1158 if (fField.GetParentId() != ROOT::kInvalidDescriptorId) {
1159 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1160 if (!validName) {
1162 }
1163 if (fField.GetFieldName().empty()) {
1164 return R__FAIL("name cannot be empty string \"\"");
1165 }
1166 }
1167 return fField.Clone();
1168}
1169
1171{
1172 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1173 if (fDescriptor.fHeaderExtension)
1174 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1175 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1176 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1177 }
1178}
1179
1182{
1184 if (!(fieldExists = EnsureFieldExists(fieldId)))
1186 if (!(fieldExists = EnsureFieldExists(linkId)))
1187 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1188
1189 if (linkId == fDescriptor.GetFieldZeroId()) {
1190 return R__FAIL("cannot make FieldZero a child field");
1191 }
1192 // fail if field already has another valid parent
1193 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1195 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1196 }
1197 if (fieldId == linkId) {
1198 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1199 }
1200 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1201 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1202 return RResult<void>::Success();
1203}
1204
1207{
1209 if (!(fieldExists = EnsureFieldExists(sourceId)))
1211 if (!(fieldExists = EnsureFieldExists(targetId)))
1212 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1213
1214 if (targetId == fDescriptor.GetFieldZeroId()) {
1215 return R__FAIL("cannot make FieldZero a projected field");
1216 }
1217 if (sourceId == targetId) {
1218 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1219 }
1220 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1221 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1222 }
1223 // fail if target field already has another valid projection source
1224 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1225 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1226 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1227 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1228 }
1229 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1230 return RResult<void>::Success();
1231}
1232
1234{
1235 const auto fieldId = columnDesc.GetFieldId();
1236 const auto columnIndex = columnDesc.GetIndex();
1237 const auto representationIndex = columnDesc.GetRepresentationIndex();
1238
1239 auto fieldExists = EnsureFieldExists(fieldId);
1240 if (!fieldExists) {
1242 }
1243 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1244
1245 if (columnDesc.IsAliasColumn()) {
1246 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1247 return R__FAIL("alias column type mismatch");
1248 }
1249 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != ROOT::kInvalidDescriptorId) {
1250 return R__FAIL("column index clash");
1251 }
1252 if (columnIndex > 0) {
1253 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == ROOT::kInvalidDescriptorId)
1254 return R__FAIL("out of bounds column index");
1255 }
1256 if (representationIndex > 0) {
1257 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == ROOT::kInvalidDescriptorId) {
1258 return R__FAIL("out of bounds representation index");
1259 }
1260 if (columnIndex == 0) {
1261 assert(fieldDesc.fColumnCardinality > 0);
1262 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1264 return R__FAIL("incomplete column representations");
1265 }
1266 } else {
1267 if (columnIndex >= fieldDesc.fColumnCardinality)
1268 return R__FAIL("irregular column representations");
1269 }
1270 } else {
1271 // This will set the column cardinality to the number of columns of the first representation
1272 fieldDesc.fColumnCardinality = columnIndex + 1;
1273 }
1274
1275 const auto logicalId = columnDesc.GetLogicalId();
1276 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1277
1278 if (!columnDesc.IsAliasColumn())
1279 fDescriptor.fNPhysicalColumns++;
1280 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1281 if (fDescriptor.fHeaderExtension)
1282 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1283
1284 return RResult<void>::Success();
1285}
1286
1288{
1289 const auto id = clusterGroup.GetId();
1290 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1291 return R__FAIL("cluster group id clash");
1292 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1293 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1294 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1295 return RResult<void>::Success();
1296}
1297
1299{
1300 fDescriptor.fName = "";
1301 fDescriptor.fDescription = "";
1302 fDescriptor.fFieldDescriptors.clear();
1303 fDescriptor.fColumnDescriptors.clear();
1304 fDescriptor.fClusterDescriptors.clear();
1305 fDescriptor.fClusterGroupDescriptors.clear();
1306 fDescriptor.fHeaderExtension.reset();
1307}
1308
1313
1315{
1316 if (!fDescriptor.fHeaderExtension)
1317 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1318}
1319
1321{
1322 if (fDescriptor.GetNLogicalColumns() == 0)
1323 return;
1324 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1325
1326 for (ROOT::DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1327 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1328 R__ASSERT(c.IsAliasColumn());
1329 R__ASSERT(id == c.GetLogicalId());
1330 fDescriptor.fColumnDescriptors.erase(id);
1331 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1332 if (link == c.fLogicalColumnId) {
1333 link += offset;
1334 break;
1335 }
1336 }
1337 c.fLogicalColumnId += offset;
1338 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1339 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1340 }
1341
1342 // Patch up column ids in the header extension
1343 if (auto &xHeader = fDescriptor.fHeaderExtension) {
1344 for (auto &columnId : xHeader->fExtendedColumnRepresentations) {
1345 if (columnId >= fDescriptor.GetNPhysicalColumns())
1346 columnId += offset;
1347 }
1348 }
1349}
1350
1352{
1353 auto clusterId = clusterDesc.GetId();
1354 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1355 return R__FAIL("cluster id clash");
1356 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1357 return RResult<void>::Success();
1358}
1359
1362{
1363 // Make sure we have no duplicates
1364 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1365 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1366 return R__FAIL("extra type info duplicates");
1367 }
1368 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1369 return RResult<void>::Success();
1370}
1371
1373{
1374 auto it = std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1376 if (it != fDescriptor.fExtraTypeInfoDescriptors.end())
1377 *it = std::move(extraTypeInfoDesc);
1378 else
1379 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1380}
1381
1386
1392
1399
1402{
1403 return GetFieldIterable(GetFieldDescriptor(fieldId));
1404}
1405
1412
1414{
1415 return GetFieldIterable(GetFieldZeroId());
1416}
1417
1419 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const
1420{
1421 return GetFieldIterable(GetFieldZeroId(), comparator);
1422}
1423
1428
1434
1440
1445
1450
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
void cd(Int_t id=-1)
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
static std::pair< std::uint16_t, std::uint16_t > GetValidBitRange(ROOT::ENTupleColumnType type)
Most types have a fixed on-disk bit width.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor.
void AddField(const RFieldDescriptor &fieldDesc)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
A helper class for serializing and deserialization of the RNTuple binary format.
The window of element indexes of a particular column in a particular cluster.
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
Metadata for RNTuple clusters.
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
RColumnDescriptor Clone() const
Get a copy of the descriptor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
Metadata stored for every field of an RNTuple.
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool operator==(const RFieldDescriptor &other) const
std::string fFieldDescription
Free text set by the user.
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:72
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an RNTuple (in unspecified order)
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
Used to loop over a field's associated columns.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
Used to loop over a field's child fields.
std::vector< ROOT::DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage metadata of an RNTuple.
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
Generic information about the physical location of data.
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static constexpr std::uint16_t kVersionPatch
Definition RNTuple.hxx:79
static constexpr std::uint16_t kVersionMajor
Definition RNTuple.hxx:77
static constexpr std::uint16_t kVersionEpoch
Definition RNTuple.hxx:76
static constexpr std::uint16_t kVersionMinor
Definition RNTuple.hxx:78
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
Additional information about a page in an in-memory RPageRange.
Information about a single page in the context of a cluster's page range.
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345