Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RFieldBase.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <string_view>
24
25#include <RZip.h>
26#include <TError.h>
28
29#include <algorithm>
30#include <cstdint>
31#include <deque>
32#include <functional>
33#include <iostream>
34#include <set>
35#include <utility>
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
43 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
44 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::Experimental::RFieldBase>
70 const RCreateFieldOptions &options) const
71{
72 if (GetStructure() == ENTupleStructure::kStreamer) {
73 auto streamerField = std::make_unique<RStreamerField>(GetFieldName(), GetTypeName());
74 streamerField->SetOnDiskId(fFieldId);
75 return streamerField;
76 }
77
78 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
79 // For forward compatibility, we allow this case and return an InvalidField.
80 if (GetStructure() == ENTupleStructure::kUnknown) {
81 if (options.fReturnInvalidOnError) {
82 auto invalidField = std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), "",
84 invalidField->SetOnDiskId(fFieldId);
85 return invalidField;
86 } else {
87 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
88 }
89 }
90
91 if (GetTypeName().empty()) {
92 switch (GetStructure()) {
94 std::vector<std::unique_ptr<RFieldBase>> memberFields;
95 memberFields.reserve(fLinkIds.size());
96 for (auto id : fLinkIds) {
97 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
98 auto field = memberDesc.CreateField(ntplDesc, options);
99 if (field->GetTraits() & RFieldBase::kTraitInvalidField)
100 return field;
101 memberFields.emplace_back(std::move(field));
102 }
103 auto recordField = std::make_unique<RRecordField>(GetFieldName(), std::move(memberFields));
104 recordField->SetOnDiskId(fFieldId);
105 return recordField;
106 }
108 if (fLinkIds.size() != 1) {
109 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
110 }
111 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
112 if (itemField->GetTraits() & RFieldBase::kTraitInvalidField)
113 return itemField;
114 auto collectionField = RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
115 collectionField->SetOnDiskId(fFieldId);
116 return collectionField;
117 }
118 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
119 }
120 }
121
122 try {
123 const auto &fieldName = GetFieldName();
124 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
125 // NOTE: Unwrap() here may throw an exception, hence the try block.
126 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
127 // error.
128 auto field = Internal::CallFieldBaseCreate(fieldName, typeName, typeName, options, &ntplDesc, fFieldId).Unwrap();
129 field->SetOnDiskId(fFieldId);
130
131 for (auto &subfield : *field) {
132 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
133 subfield.SetOnDiskId(subfieldId);
134 if (subfield.GetTraits() & RFieldBase::kTraitInvalidField) {
135 auto &invalidField = static_cast<RInvalidField &>(subfield);
136 // A subfield being invalid "infects" its entire ancestry.
137 return invalidField.Clone(fieldName);
138 }
139 }
140
141 return field;
142 } catch (RException &ex) {
143 if (options.fReturnInvalidOnError)
144 return std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
146 else
147 throw ex;
148 }
149}
150
152{
153 if (fStructure != ENTupleStructure::kRecord && fStructure != ENTupleStructure::kStreamer)
154 return false;
155
156 // Skip untyped structs
157 if (fTypeName.empty())
158 return false;
159
160 if (fStructure == ENTupleStructure::kRecord) {
161 if (fTypeName.compare(0, 10, "std::pair<") == 0)
162 return false;
163 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
164 return false;
165 }
166
167 return true;
168}
169
170////////////////////////////////////////////////////////////////////////////////
171
173{
174 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
175 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
176 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
177 fValueRange == other.fValueRange;
178}
179
181{
182 RColumnDescriptor clone;
183 clone.fLogicalColumnId = fLogicalColumnId;
184 clone.fPhysicalColumnId = fPhysicalColumnId;
185 clone.fBitsOnStorage = fBitsOnStorage;
186 clone.fType = fType;
187 clone.fFieldId = fFieldId;
188 clone.fIndex = fIndex;
189 clone.fFirstElementIndex = fFirstElementIndex;
190 clone.fRepresentationIndex = fRepresentationIndex;
191 clone.fValueRange = fValueRange;
192 return clone;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196
199{
200 const auto N = fCumulativeNElements.size();
201 R__ASSERT(N > 0);
202 R__ASSERT(N == fPageInfos.size());
203
204 std::size_t left = 0;
205 std::size_t right = N - 1;
206 std::size_t midpoint = N;
207 while (left <= right) {
208 midpoint = (left + right) / 2;
209 if (fCumulativeNElements[midpoint] <= idxInCluster) {
210 left = midpoint + 1;
211 continue;
212 }
213
214 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
215 break;
216
217 right = midpoint - 1;
218 }
220
221 auto pageInfo = fPageInfos[midpoint];
222 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
224 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
226}
227
228std::size_t
231 std::size_t pageSize)
232{
233 R__ASSERT(fPhysicalColumnId == columnRange.fPhysicalColumnId);
234 R__ASSERT(!columnRange.fIsSuppressed);
235
236 const auto nElements = std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
237 [](std::size_t n, const auto &PI) { return n + PI.fNElements; });
238 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.fNElements);
239
241 return 0U;
242 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
243
244 std::vector<RPageInfo> pageInfos;
245 // Synthesize new `RPageInfo`s as needed
246 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
251 PI.fLocator.SetType(RNTupleLocator::kTypePageZero);
252 PI.fLocator.SetNBytesOnStorage(element.GetPackedSize(PI.fNElements));
253 pageInfos.emplace_back(PI);
254 nRemainingElements -= PI.fNElements;
255 }
256
257 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
258 std::make_move_iterator(fPageInfos.end()));
259 std::swap(fPageInfos, pageInfos);
261}
262
264{
265 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
266 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
267}
268
270{
271 std::uint64_t nbytes = 0;
272 for (const auto &pr : fPageRanges) {
273 for (const auto &pi : pr.second.fPageInfos) {
274 nbytes += pi.fLocator.GetNBytesOnStorage();
275 }
276 }
277 return nbytes;
278}
279
281{
282 RClusterDescriptor clone;
283 clone.fClusterId = fClusterId;
284 clone.fFirstEntryIndex = fFirstEntryIndex;
285 clone.fNEntries = fNEntries;
286 clone.fColumnRanges = fColumnRanges;
287 for (const auto &d : fPageRanges)
288 clone.fPageRanges.emplace(d.first, d.second.Clone());
289 return clone;
290}
291
292////////////////////////////////////////////////////////////////////////////////
293
295{
296 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
297}
298
300{
302 clone.fContentId = fContentId;
303 clone.fTypeVersion = fTypeVersion;
304 clone.fTypeName = fTypeName;
305 clone.fContent = fContent;
306 return clone;
307}
308
309////////////////////////////////////////////////////////////////////////////////
310
312{
313 // clang-format off
314 return fName == other.fName &&
315 fDescription == other.fDescription &&
316 fNEntries == other.fNEntries &&
317 fGeneration == other.fGeneration &&
318 fFieldZeroId == other.fFieldZeroId &&
319 fFieldDescriptors == other.fFieldDescriptors &&
320 fColumnDescriptors == other.fColumnDescriptors &&
321 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
322 fClusterDescriptors == other.fClusterDescriptors;
323 // clang-format on
324}
325
328{
330 for (const auto &cd : fClusterDescriptors) {
331 if (!cd.second.ContainsColumn(physicalColumnId))
332 continue;
333 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
334 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
335 }
336 return result;
337}
338
341{
342 std::string leafName(fieldName);
343 auto posDot = leafName.find_last_of('.');
344 if (posDot != std::string::npos) {
345 auto parentName = leafName.substr(0, posDot);
346 leafName = leafName.substr(posDot + 1);
347 parentId = FindFieldId(parentName, parentId);
348 }
349 auto itrFieldDesc = fFieldDescriptors.find(parentId);
350 if (itrFieldDesc == fFieldDescriptors.end())
352 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
353 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
354 return linkId;
355 }
357}
358
360{
362 return "";
363
364 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
365 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
366 if (prefix.empty())
367 return fieldDescriptor.GetFieldName();
368 return prefix + "." + fieldDescriptor.GetFieldName();
369}
370
372{
373 return FindFieldId(fieldName, GetFieldZeroId());
374}
375
378 std::uint16_t representationIndex) const
379{
380 auto itr = fFieldDescriptors.find(fieldId);
381 if (itr == fFieldDescriptors.cend())
383 if (columnIndex >= itr->second.GetColumnCardinality())
385 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
386 if (itr->second.GetLogicalColumnIds().size() <= idx)
388 return itr->second.GetLogicalColumnIds()[idx];
389}
390
393 std::uint16_t representationIndex) const
394{
395 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
398 return GetColumnDescriptor(logicalId).GetPhysicalId();
399}
400
403{
404 if (GetNClusterGroups() == 0)
406
407 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
408
409 std::size_t cgLeft = 0;
410 std::size_t cgRight = GetNClusterGroups() - 1;
411 while (cgLeft <= cgRight) {
412 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
413 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
414 R__ASSERT(!clusterIds.empty());
415
416 const auto firstElementInGroup =
417 GetClusterDescriptor(clusterIds.front()).GetColumnRange(physicalColumnId).fFirstElementIndex;
419 // Look into the lower half of cluster groups
421 cgRight = cgMidpoint - 1;
422 continue;
423 }
424
425 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
426 if ((lastColumnRange.fFirstElementIndex + lastColumnRange.fNElements) <= index) {
427 // Look into the upper half of cluster groups
428 cgLeft = cgMidpoint + 1;
429 continue;
430 }
431
432 // Binary search in the current cluster group; since we already checked the element range boundaries,
433 // the element must be in that cluster group.
434 std::size_t clusterLeft = 0;
435 std::size_t clusterRight = clusterIds.size() - 1;
436 while (clusterLeft <= clusterRight) {
437 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
439 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
440
441 if (columnRange.Contains(index))
442 return clusterId;
443
444 if (columnRange.fFirstElementIndex > index) {
447 continue;
448 }
449
450 if (columnRange.fFirstElementIndex + columnRange.fNElements <= index) {
452 continue;
453 }
454 }
455 R__ASSERT(false);
456 }
458}
459
461{
462 if (GetNClusterGroups() == 0)
464
465 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
466
467 std::size_t cgLeft = 0;
468 std::size_t cgRight = GetNClusterGroups() - 1;
469 while (cgLeft <= cgRight) {
470 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
471 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
472
473 if (cgDesc.GetMinEntry() > entryIdx) {
475 cgRight = cgMidpoint - 1;
476 continue;
477 }
478
479 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
480 cgLeft = cgMidpoint + 1;
481 continue;
482 }
483
484 // Binary search in the current cluster group; since we already checked the element range boundaries,
485 // the element must be in that cluster group.
486 const auto &clusterIds = cgDesc.GetClusterIds();
487 R__ASSERT(!clusterIds.empty());
488 std::size_t clusterLeft = 0;
489 std::size_t clusterRight = clusterIds.size() - 1;
490 while (clusterLeft <= clusterRight) {
491 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
492 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
493
494 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
497 continue;
498 }
499
500 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
502 continue;
503 }
504
506 }
507 R__ASSERT(false);
508 }
510}
511
514{
515 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
516 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
517 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
518 // binary search code path remains tested.
519 const auto &clusterDesc = GetClusterDescriptor(clusterId);
520 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
521 return FindClusterId(firstEntryInNextCluster);
522}
523
526{
527 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
528 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
529 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
530 // binary search code path remains tested.
531 const auto &clusterDesc = GetClusterDescriptor(clusterId);
532 if (clusterDesc.GetFirstEntryIndex() == 0)
534 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
535}
536
537std::vector<ROOT::Experimental::DescriptorId_t>
539{
540 auto fieldZeroId = desc.GetFieldZeroId();
541
542 std::vector<DescriptorId_t> fields;
543 for (const DescriptorId_t fieldId : fFieldIdsOrder) {
544 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
545 fields.emplace_back(fieldId);
546 }
547 return fields;
548}
549
555
558 : fNTuple(ntuple)
559{
560 std::deque<DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
561
562 while (!fieldIdQueue.empty()) {
563 auto currFieldId = fieldIdQueue.front();
564 fieldIdQueue.pop_front();
565
566 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
567 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
568
569 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
570 auto fieldId = field.GetId();
571 fieldIdQueue.push_back(fieldId);
572 }
573 }
574}
575
577{
578 std::vector<std::uint64_t> result;
579 unsigned int base = 0;
580 std::uint64_t flags = 0;
581 for (auto f : fFeatureFlags) {
582 if ((f > 0) && ((f % 64) == 0))
583 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
584 while (f > base + 64) {
585 result.emplace_back(flags);
586 flags = 0;
587 base += 64;
588 }
589 f -= base;
590 flags |= 1 << f;
591 }
592 result.emplace_back(flags);
593 return result;
594}
595
598 std::vector<RClusterDescriptor> &clusterDescs)
599{
601 if (iter == fClusterGroupDescriptors.end())
602 return R__FAIL("invalid attempt to add details of unknown cluster group");
603 if (iter->second.HasClusterDetails())
604 return R__FAIL("invalid attempt to re-populate cluster group details");
605 if (iter->second.GetNClusters() != clusterDescs.size())
606 return R__FAIL("mismatch of number of clusters");
607
608 std::vector<DescriptorId_t> clusterIds;
609 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
610 clusterIds.emplace_back(clusterDescs[i].GetId());
611 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
612 if (!success) {
613 return R__FAIL("invalid attempt to re-populate existing cluster");
614 }
615 }
616 std::sort(clusterIds.begin(), clusterIds.end(), [this](DescriptorId_t a, DescriptorId_t b) {
617 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
618 });
620 cgBuilder.AddSortedClusters(clusterIds);
621 iter->second = cgBuilder.MoveDescriptor().Unwrap();
622 return RResult<void>::Success();
623}
624
626{
628 if (iter == fClusterGroupDescriptors.end())
629 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
630 if (!iter->second.HasClusterDetails())
631 return R__FAIL("invalid attempt to drop details of cluster group summary");
632
633 for (auto clusterId : iter->second.GetClusterIds())
635 iter->second = iter->second.CloneSummary();
636 return RResult<void>::Success();
637}
638
639std::unique_ptr<ROOT::Experimental::RNTupleModel>
641{
642 auto fieldZero = std::make_unique<RFieldZero>();
643 fieldZero->SetOnDiskId(GetFieldZeroId());
644 auto model =
647 createFieldOpts.fReturnInvalidOnError = options.fForwardCompatible;
648 createFieldOpts.fEmulateUnknownTypes = options.fEmulateUnknownTypes;
649 for (const auto &topDesc : GetTopLevelFields()) {
650 auto field = topDesc.CreateField(*this, createFieldOpts);
651 if (field->GetTraits() & RFieldBase::kTraitInvalidField)
652 continue;
653
654 if (options.fReconstructProjections && topDesc.IsProjectedField()) {
655 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
656 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
657 });
658 } else {
659 model->AddField(std::move(field));
660 }
661 }
662 model->Freeze();
663 return model;
664}
665
667{
668 RNTupleDescriptor clone;
669 clone.fName = fName;
674 clone.fNEntries = fNEntries;
675 clone.fNClusters = fNClusters;
678 clone.fGeneration = fGeneration;
679 for (const auto &d : fFieldDescriptors)
680 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
681 for (const auto &d : fColumnDescriptors)
682 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
683 for (const auto &d : fClusterGroupDescriptors)
684 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
686 for (const auto &d : fClusterDescriptors)
687 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
688 for (const auto &d : fExtraTypeInfoDescriptors)
689 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
691 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
692 return clone;
693}
694
695////////////////////////////////////////////////////////////////////////////////
696
698{
699 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
700 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
701}
702
704{
706 clone.fClusterGroupId = fClusterGroupId;
707 clone.fPageListLocator = fPageListLocator;
708 clone.fPageListLength = fPageListLength;
709 clone.fMinEntry = fMinEntry;
710 clone.fEntrySpan = fEntrySpan;
711 clone.fNClusters = fNClusters;
712 return clone;
713}
714
716{
717 RClusterGroupDescriptor clone = CloneSummary();
718 clone.fClusterIds = fClusterIds;
719 return clone;
720}
721
722////////////////////////////////////////////////////////////////////////////////
723
727{
728 if (physicalId != pageRange.fPhysicalColumnId)
729 return R__FAIL("column ID mismatch");
730 if (fCluster.fColumnRanges.count(physicalId) > 0)
731 return R__FAIL("column ID conflict");
733 for (const auto &pi : pageRange.fPageInfos) {
734 columnRange.fNElements += pi.fNElements;
735 }
736 fCluster.fPageRanges[physicalId] = pageRange.Clone();
737 fCluster.fColumnRanges[physicalId] = columnRange;
738 return RResult<void>::Success();
739}
740
743{
744 if (fCluster.fColumnRanges.count(physicalId) > 0)
745 return R__FAIL("column ID conflict");
746
748 columnRange.fPhysicalColumnId = physicalId;
749 columnRange.fIsSuppressed = true;
750 fCluster.fColumnRanges[physicalId] = columnRange;
751 return RResult<void>::Success();
752}
753
756{
757 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
758 if (!columnRange.fIsSuppressed)
759 continue;
760 R__ASSERT(columnRange.fFirstElementIndex == kInvalidNTupleIndex);
761
762 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.fPhysicalColumnId);
763 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
764 // We expect only few columns and column representations per field, so we do a linear search
765 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
767 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
768 continue;
769 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
770 continue;
771
772 // Found corresponding column of a different column representation
773 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
774 if (otherColumnRange.fIsSuppressed)
775 continue;
776
777 columnRange.fFirstElementIndex = otherColumnRange.fFirstElementIndex;
778 columnRange.fNElements = otherColumnRange.fNElements;
779 break;
780 }
781
782 if (columnRange.fFirstElementIndex == kInvalidNTupleIndex) {
783 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
784 std::to_string(columnRange.fPhysicalColumnId) +
785 ", cluster ID: " + std::to_string(fCluster.GetId()));
786 }
787 }
788 return RResult<void>::Success();
789}
790
793{
794 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
795 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
796 /// parent field in the hierarchy.
798 const auto &visitField, const auto &enterSubtree) -> void {
800 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
801 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
803 }
804 };
805
806 // Extended columns can only be part of the header extension
807 if (!desc.GetHeaderExtension())
808 return *this;
809
810 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
811 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
812 for (const auto &topLevelField : desc.GetTopLevelFields()) {
814 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
815 [&](DescriptorId_t fieldId, std::uint64_t nRepetitions) {
816 for (const auto &c : desc.GetColumnIterable(fieldId)) {
817 const DescriptorId_t physicalId = c.GetPhysicalId();
818 auto &columnRange = fCluster.fColumnRanges[physicalId];
819
820 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
821 // extension won't have on-disk metadata for the clusters that were already committed before the model
822 // was extended. Therefore, these need to be synthetically initialized upon reading.
823 if (columnRange.fPhysicalColumnId == kInvalidDescriptorId) {
824 columnRange.fPhysicalColumnId = physicalId;
825 columnRange.fFirstElementIndex = 0;
826 columnRange.fNElements = 0;
827 columnRange.fIsSuppressed = c.IsSuppressedDeferredColumn();
828 }
829 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
830 // number of elements should have been if the column was not deferred; fix those and let
831 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
832 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
833 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
834 // excluding subfields of collection and variant fields.
835 if (c.IsDeferredColumn()) {
836 columnRange.fFirstElementIndex = fCluster.GetFirstEntryIndex() * nRepetitions;
837 columnRange.fNElements = fCluster.GetNEntries() * nRepetitions;
838 if (!columnRange.fIsSuppressed) {
839 auto &pageRange = fCluster.fPageRanges[physicalId];
840 pageRange.fPhysicalColumnId = physicalId;
841 const auto element = Internal::RColumnElementBase::Generate<void>(c.GetType());
842 pageRange.ExtendToFitColumnRange(columnRange, *element, Internal::RPage::kPageZeroSize);
843 }
844 } else if (!columnRange.fIsSuppressed) {
845 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
846 }
847 }
848 },
850 }
851 return *this;
852}
853
856{
857 if (fCluster.fClusterId == kInvalidDescriptorId)
858 return R__FAIL("unset cluster ID");
859 if (fCluster.fNEntries == 0)
860 return R__FAIL("empty cluster");
861 for (auto &pr : fCluster.fPageRanges) {
862 if (fCluster.fColumnRanges.count(pr.first) == 0) {
863 return R__FAIL("missing column range");
864 }
865 pr.second.fCumulativeNElements.clear();
866 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
867 NTupleSize_t sum = 0;
868 for (const auto &pi : pr.second.fPageInfos) {
869 sum += pi.fNElements;
870 pr.second.fCumulativeNElements.emplace_back(sum);
871 }
872 }
874 std::swap(result, fCluster);
875 return result;
876}
877
878////////////////////////////////////////////////////////////////////////////////
879
883{
885 builder.ClusterGroupId(clusterGroupDesc.GetId())
886 .PageListLocator(clusterGroupDesc.GetPageListLocator())
887 .PageListLength(clusterGroupDesc.GetPageListLength())
888 .MinEntry(clusterGroupDesc.GetMinEntry())
889 .EntrySpan(clusterGroupDesc.GetEntrySpan())
890 .NClusters(clusterGroupDesc.GetNClusters());
891 return builder;
892}
893
896{
897 if (fClusterGroup.fClusterGroupId == kInvalidDescriptorId)
898 return R__FAIL("unset cluster group ID");
900 std::swap(result, fClusterGroup);
901 return result;
902}
903
904////////////////////////////////////////////////////////////////////////////////
905
908{
909 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
910 throw RException(R__FAIL("invalid extra type info content id"));
912 std::swap(result, fExtraTypeInfo);
913 return result;
914}
915
916////////////////////////////////////////////////////////////////////////////////
917
920{
921 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
922 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
923 return RResult<void>::Success();
924}
925
927{
928 // Reuse field name validity check
929 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
930 if (!validName) {
932 }
933
934 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
935 // parent not properly set?
936 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
937 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
938 }
939
940 // Same number of columns in every column representation?
941 const auto columnCardinality = fieldDesc.GetColumnCardinality();
942 if (columnCardinality == 0)
943 continue;
944
945 // In AddColumn, we already checked that all but the last representation are complete.
946 // Check that the last column representation is complete, i.e. has all columns.
947 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
948 const auto nColumns = logicalColumnIds.size();
949 // If we have only a single column representation, the following condition is true by construction
950 if ((nColumns + 1) == columnCardinality)
951 continue;
952
953 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
954 if (lastColumn.GetIndex() + 1 != columnCardinality)
955 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
956 }
957
958 return RResult<void>::Success();
959}
960
962{
963 EnsureValidDescriptor().ThrowOnError();
964 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
965 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
966 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
967 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
969 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
970 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
971 });
973 std::swap(result, fDescriptor);
974 return result;
975}
976
978 const std::string_view description)
979{
980 fDescriptor.fName = std::string(name);
981 fDescriptor.fDescription = std::string(description);
982}
983
985{
986 if (flag % 64 == 0)
987 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
988 fDescriptor.fFeatureFlags.insert(flag);
989}
990
993{
994 if (fColumn.GetLogicalId() == kInvalidDescriptorId)
995 return R__FAIL("invalid logical column id");
996 if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
997 return R__FAIL("invalid physical column id");
998 if (fColumn.GetFieldId() == kInvalidDescriptorId)
999 return R__FAIL("invalid field id, dangling column");
1000
1001 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1002 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1003 // as we have no idea what the valid range is.
1004 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1005 // we try to read the actual data contained in it.
1006 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1007 const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
1008 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1009 return R__FAIL("invalid column bit width");
1010 }
1011
1012 return fColumn.Clone();
1013}
1014
1022
1025{
1027 fieldDesc.FieldVersion(field.GetFieldVersion())
1028 .TypeVersion(field.GetTypeVersion())
1029 .FieldName(field.GetFieldName())
1030 .FieldDescription(field.GetDescription())
1031 .TypeName(field.GetTypeName())
1032 .TypeAlias(field.GetTypeAlias())
1033 .Structure(field.GetStructure())
1034 .NRepetitions(field.GetNRepetitions());
1035 if (field.GetTraits() & RFieldBase::kTraitTypeChecksum)
1036 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1037 return fieldDesc;
1038}
1039
1042{
1043 if (fField.GetId() == kInvalidDescriptorId) {
1044 return R__FAIL("invalid field id");
1045 }
1046 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
1047 return R__FAIL("invalid field structure");
1048 }
1049 // FieldZero is usually named "" and would be a false positive here
1050 if (fField.GetParentId() != kInvalidDescriptorId) {
1051 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1052 if (!validName) {
1054 }
1055 if (fField.GetFieldName().empty()) {
1056 return R__FAIL("name cannot be empty string \"\"");
1057 }
1058 }
1059 return fField.Clone();
1060}
1061
1063{
1064 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1065 if (fDescriptor.fHeaderExtension)
1066 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1067 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
1068 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1069 }
1070}
1071
1074{
1076 if (!(fieldExists = EnsureFieldExists(fieldId)))
1078 if (!(fieldExists = EnsureFieldExists(linkId)))
1079 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1080
1081 if (linkId == fDescriptor.GetFieldZeroId()) {
1082 return R__FAIL("cannot make FieldZero a child field");
1083 }
1084 // fail if field already has another valid parent
1085 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1086 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
1087 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1088 }
1089 if (fieldId == linkId) {
1090 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1091 }
1092 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1093 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1094 return RResult<void>::Success();
1095}
1096
1099{
1101 if (!(fieldExists = EnsureFieldExists(sourceId)))
1103 if (!(fieldExists = EnsureFieldExists(targetId)))
1104 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1105
1106 if (targetId == fDescriptor.GetFieldZeroId()) {
1107 return R__FAIL("cannot make FieldZero a projected field");
1108 }
1109 if (sourceId == targetId) {
1110 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1111 }
1112 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1113 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1114 }
1115 // fail if target field already has another valid projection source
1116 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1117 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1118 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1119 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1120 }
1121 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1122 return RResult<void>::Success();
1123}
1124
1126{
1127 const auto fieldId = columnDesc.GetFieldId();
1128 const auto columnIndex = columnDesc.GetIndex();
1129 const auto representationIndex = columnDesc.GetRepresentationIndex();
1130
1131 auto fieldExists = EnsureFieldExists(fieldId);
1132 if (!fieldExists) {
1134 }
1135 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1136
1137 if (columnDesc.IsAliasColumn()) {
1138 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1139 return R__FAIL("alias column type mismatch");
1140 }
1141 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != kInvalidDescriptorId) {
1142 return R__FAIL("column index clash");
1143 }
1144 if (columnIndex > 0) {
1145 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == kInvalidDescriptorId)
1146 return R__FAIL("out of bounds column index");
1147 }
1148 if (representationIndex > 0) {
1149 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == kInvalidDescriptorId) {
1150 return R__FAIL("out of bounds representation index");
1151 }
1152 if (columnIndex == 0) {
1153 assert(fieldDesc.fColumnCardinality > 0);
1154 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1156 return R__FAIL("incomplete column representations");
1157 }
1158 } else {
1159 if (columnIndex >= fieldDesc.fColumnCardinality)
1160 return R__FAIL("irregular column representations");
1161 }
1162 } else {
1163 // This will set the column cardinality to the number of columns of the first representation
1164 fieldDesc.fColumnCardinality = columnIndex + 1;
1165 }
1166
1167 const auto logicalId = columnDesc.GetLogicalId();
1168 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1169
1170 if (!columnDesc.IsAliasColumn())
1171 fDescriptor.fNPhysicalColumns++;
1172 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1173 if (fDescriptor.fHeaderExtension)
1174 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1175
1176 return RResult<void>::Success();
1177}
1178
1181{
1182 const auto id = clusterGroup.GetId();
1183 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1184 return R__FAIL("cluster group id clash");
1185 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1186 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1187 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1188 return RResult<void>::Success();
1189}
1190
1192{
1193 fDescriptor.fName = "";
1194 fDescriptor.fDescription = "";
1195 fDescriptor.fFieldDescriptors.clear();
1196 fDescriptor.fColumnDescriptors.clear();
1197 fDescriptor.fClusterDescriptors.clear();
1198 fDescriptor.fClusterGroupDescriptors.clear();
1199 fDescriptor.fHeaderExtension.reset();
1200}
1201
1203{
1204 if (!fDescriptor.fHeaderExtension)
1205 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1206}
1207
1209{
1210 if (fDescriptor.GetNLogicalColumns() == 0)
1211 return;
1212 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1213
1214 for (DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1215 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1216 R__ASSERT(c.IsAliasColumn());
1217 R__ASSERT(id == c.GetLogicalId());
1218 fDescriptor.fColumnDescriptors.erase(id);
1219 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1220 if (link == c.fLogicalColumnId) {
1221 link += offset;
1222 break;
1223 }
1224 }
1225 c.fLogicalColumnId += offset;
1226 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1227 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1228 }
1229}
1230
1232{
1233 auto clusterId = clusterDesc.GetId();
1234 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1235 return R__FAIL("cluster id clash");
1236 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1237 return RResult<void>::Success();
1238}
1239
1242{
1243 // Make sure we have no duplicates
1244 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1245 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1246 return R__FAIL("extra type info duplicates");
1247 }
1248 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1249 return RResult<void>::Success();
1250}
1251
1254{
1256 const auto &desc = GetDescriptor();
1257
1258 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1260 if (fieldDesc.IsCustomClass()) {
1261 // Add streamer info for this class to streamerInfoMap
1262 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1263 if (!cl) {
1264 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1265 }
1266 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1267 if (!streamerInfo) {
1268 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1269 }
1271 }
1272
1273 // Recursively traverse sub fields
1274 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1276 }
1277 };
1278
1279 fnWalkFieldTree(desc.GetFieldZero());
1280
1281 // Add the streamer info records from streamer fields: because of runtime polymorphism we may need to add additional
1282 // types not covered by the type names stored in the field headers
1283 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1284 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1285 continue;
1286 // Ideally, we would avoid deserializing the streamer info records of the streamer fields that we just serialized.
1287 // However, this happens only once at the end of writing and only when streamer fields are used, so the
1288 // preference here is for code simplicity.
1290 }
1291
1292 return streamerInfoMap;
1293}
1294
1300
1306
1312
1315{
1316 return GetFieldIterable(GetFieldDescriptor(fieldId));
1317}
1318
1324
1327{
1328 return GetFieldIterable(GetFieldZeroId());
1329}
1330
1333 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1334{
1335 return GetFieldIterable(GetFieldZeroId(), comparator);
1336}
1337
1343
1349
1355
1361
1367
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define PI
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RFieldDescriptor.
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
Meta-data stored for every column of an ntuple.
ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:76
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an ntuple (in unspecified order)
Used to loop over all the cluster groups of an ntuple (in unspecified order)
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an ntuple (in unspecified order)
std::vector< DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
RColumnDescriptorIterable GetColumnIterable() const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
bool operator==(const RNTupleDescriptor &other) const
std::vector< DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t FindClusterId(NTupleSize_t entryIdx) const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3069
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::Experimental::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias, const RCreateFieldOptions &options, const RNTupleDescriptor *desc, DescriptorId_t fieldId)
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
bool fReturnInvalidOnError
If true, failing to create a field will return a RInvalidField instead of throwing an exception.
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fEmulateUnknownTypes
If true, fields with a user defined type that have no available dictionaries will be reconstructed as...
bool fForwardCompatible
Normally creating a model will fail if any of the reconstructed fields contains an unknown column typ...
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345