Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RFieldBase.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <string_view>
24
25#include <RZip.h>
26#include <TError.h>
28
29#include <algorithm>
30#include <cstdint>
31#include <deque>
32#include <functional>
33#include <iostream>
34#include <set>
35#include <utility>
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::Experimental::RFieldBase>
69ROOT::Experimental::RFieldDescriptor::CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError) const
70{
71 if (GetStructure() == ENTupleStructure::kStreamer) {
72 auto streamerField = std::make_unique<RStreamerField>(GetFieldName(), GetTypeName());
73 streamerField->SetOnDiskId(fFieldId);
74 return streamerField;
75 }
76
77 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
78 // For forward compatibility, we allow this case and return an InvalidField.
79 if (GetStructure() == ENTupleStructure::kUnknown) {
80 if (continueOnError) {
81 auto invalidField = std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), "");
82 invalidField->SetOnDiskId(fFieldId);
83 return invalidField;
84 } else {
85 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
86 }
87 }
88
89 if (GetTypeName().empty()) {
90 switch (GetStructure()) {
92 std::vector<std::unique_ptr<RFieldBase>> memberFields;
93 for (auto id : fLinkIds) {
94 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
95 auto field = memberDesc.CreateField(ntplDesc, continueOnError);
96 if (dynamic_cast<RInvalidField *>(field.get()))
97 return field;
98 memberFields.emplace_back(std::move(field));
99 }
100 auto recordField = std::make_unique<RRecordField>(GetFieldName(), std::move(memberFields));
101 recordField->SetOnDiskId(fFieldId);
102 return recordField;
103 }
105 if (fLinkIds.size() != 1) {
106 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
107 }
108 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, continueOnError);
109 if (dynamic_cast<RInvalidField *>(itemField.get()))
110 return itemField;
111 auto collectionField = RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
112 collectionField->SetOnDiskId(fFieldId);
113 return collectionField;
114 }
115 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
116 }
117 }
118
119 try {
120 auto field = RFieldBase::Create(GetFieldName(), GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias()).Unwrap();
121 field->SetOnDiskId(fFieldId);
122 for (auto &f : *field)
123 f.SetOnDiskId(ntplDesc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId()));
124 return field;
125 } catch (RException &ex) {
126 if (continueOnError)
127 return std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport());
128 else
129 throw ex;
130 }
131}
132
134{
135 if (fStructure != ENTupleStructure::kRecord && fStructure != ENTupleStructure::kStreamer)
136 return false;
137
138 // Skip untyped structs
139 if (fTypeName.empty())
140 return false;
141
142 if (fStructure == ENTupleStructure::kRecord) {
143 if (fTypeName.compare(0, 10, "std::pair<") == 0)
144 return false;
145 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
146 return false;
147 }
148
149 return true;
150}
151
152////////////////////////////////////////////////////////////////////////////////
153
155{
156 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
157 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
158 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
159 fValueRange == other.fValueRange;
160}
161
163{
164 RColumnDescriptor clone;
165 clone.fLogicalColumnId = fLogicalColumnId;
166 clone.fPhysicalColumnId = fPhysicalColumnId;
167 clone.fBitsOnStorage = fBitsOnStorage;
168 clone.fType = fType;
169 clone.fFieldId = fFieldId;
170 clone.fIndex = fIndex;
171 clone.fFirstElementIndex = fFirstElementIndex;
172 clone.fRepresentationIndex = fRepresentationIndex;
173 clone.fValueRange = fValueRange;
174 return clone;
175}
176
177////////////////////////////////////////////////////////////////////////////////
178
181{
182 // TODO(jblomer): binary search
183 RPageInfo pageInfo;
184 decltype(idxInCluster) firstInPage = 0;
185 NTupleSize_t pageNo = 0;
186 for (const auto &pi : fPageInfos) {
187 if (firstInPage + pi.fNElements > idxInCluster) {
188 pageInfo = pi;
189 break;
190 }
191 firstInPage += pi.fNElements;
192 ++pageNo;
193 }
194 R__ASSERT(firstInPage <= idxInCluster);
195 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
196 return RPageInfoExtended{pageInfo, firstInPage, pageNo};
197}
198
199std::size_t
201 const Internal::RColumnElementBase &element,
202 std::size_t pageSize)
203{
204 R__ASSERT(fPhysicalColumnId == columnRange.fPhysicalColumnId);
205 R__ASSERT(!columnRange.fIsSuppressed);
206
207 const auto nElements = std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
208 [](std::size_t n, const auto &PI) { return n + PI.fNElements; });
209 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.fNElements);
210
211 if (nElementsRequired == nElements)
212 return 0U;
213 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
214
215 std::vector<RPageInfo> pageInfos;
216 // Synthesize new `RPageInfo`s as needed
217 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
218 R__ASSERT(nElementsPerPage > 0);
219 for (auto nRemainingElements = nElementsRequired - nElements; nRemainingElements > 0;) {
221 PI.fNElements = std::min(nElementsPerPage, nRemainingElements);
222 PI.fLocator.fType = RNTupleLocator::kTypePageZero;
223 PI.fLocator.fBytesOnStorage = element.GetPackedSize(PI.fNElements);
224 pageInfos.emplace_back(PI);
225 nRemainingElements -= PI.fNElements;
226 }
227
228 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
229 std::make_move_iterator(fPageInfos.end()));
230 std::swap(fPageInfos, pageInfos);
231 return nElementsRequired - nElements;
232}
233
235{
236 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
237 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
238}
239
241{
242 std::uint64_t nbytes = 0;
243 for (const auto &pr : fPageRanges) {
244 for (const auto &pi : pr.second.fPageInfos) {
245 nbytes += pi.fLocator.fBytesOnStorage;
246 }
247 }
248 return nbytes;
249}
250
252{
253 RClusterDescriptor clone;
254 clone.fClusterId = fClusterId;
255 clone.fFirstEntryIndex = fFirstEntryIndex;
256 clone.fNEntries = fNEntries;
257 clone.fColumnRanges = fColumnRanges;
258 for (const auto &d : fPageRanges)
259 clone.fPageRanges.emplace(d.first, d.second.Clone());
260 return clone;
261}
262
263////////////////////////////////////////////////////////////////////////////////
264
266{
267 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
268}
269
271{
273 clone.fContentId = fContentId;
274 clone.fTypeVersion = fTypeVersion;
275 clone.fTypeName = fTypeName;
276 clone.fContent = fContent;
277 return clone;
278}
279
280////////////////////////////////////////////////////////////////////////////////
281
283{
284 // clang-format off
285 return fName == other.fName &&
286 fDescription == other.fDescription &&
287 fNEntries == other.fNEntries &&
288 fGeneration == other.fGeneration &&
289 fFieldZeroId == other.fFieldZeroId &&
290 fFieldDescriptors == other.fFieldDescriptors &&
291 fColumnDescriptors == other.fColumnDescriptors &&
292 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
293 fClusterDescriptors == other.fClusterDescriptors;
294 // clang-format on
295}
296
299{
301 for (const auto &cd : fClusterDescriptors) {
302 if (!cd.second.ContainsColumn(physicalColumnId))
303 continue;
304 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
305 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
306 }
307 return result;
308}
309
312{
313 std::string leafName(fieldName);
314 auto posDot = leafName.find_last_of('.');
315 if (posDot != std::string::npos) {
316 auto parentName = leafName.substr(0, posDot);
317 leafName = leafName.substr(posDot + 1);
318 parentId = FindFieldId(parentName, parentId);
319 }
320 auto itrFieldDesc = fFieldDescriptors.find(parentId);
321 if (itrFieldDesc == fFieldDescriptors.end())
323 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
324 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
325 return linkId;
326 }
328}
329
331{
332 if (fieldId == kInvalidDescriptorId)
333 return "";
334
335 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
336 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
337 if (prefix.empty())
338 return fieldDescriptor.GetFieldName();
339 return prefix + "." + fieldDescriptor.GetFieldName();
340}
341
343{
344 return FindFieldId(fieldName, GetFieldZeroId());
345}
346
349 std::uint16_t representationIndex) const
350{
351 auto itr = fFieldDescriptors.find(fieldId);
352 if (itr == fFieldDescriptors.cend())
354 if (columnIndex >= itr->second.GetColumnCardinality())
356 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
357 if (itr->second.GetLogicalColumnIds().size() <= idx)
359 return itr->second.GetLogicalColumnIds()[idx];
360}
361
364 std::uint16_t representationIndex) const
365{
366 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
367 if (logicalId == kInvalidDescriptorId)
369 return GetColumnDescriptor(logicalId).GetPhysicalId();
370}
371
374{
375 // TODO(jblomer): binary search?
376 for (const auto &cd : fClusterDescriptors) {
377 if (!cd.second.ContainsColumn(physicalColumnId))
378 continue;
379 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
380 if (columnRange.Contains(index))
381 return cd.second.GetId();
382 }
384}
385
386// TODO(jblomer): fix for cases of sharded clasters
389{
390 const auto &clusterDesc = GetClusterDescriptor(clusterId);
391 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
392 // TODO(jblomer): binary search?
393 for (const auto &cd : fClusterDescriptors) {
394 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
395 return cd.second.GetId();
396 }
398}
399
400// TODO(jblomer): fix for cases of sharded clasters
403{
404 const auto &clusterDesc = GetClusterDescriptor(clusterId);
405 // TODO(jblomer): binary search?
406 for (const auto &cd : fClusterDescriptors) {
407 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
408 return cd.second.GetId();
409 }
411}
412
413std::vector<ROOT::Experimental::DescriptorId_t>
415{
416 auto fieldZeroId = desc.GetFieldZeroId();
417
418 std::vector<DescriptorId_t> fields;
419 for (const DescriptorId_t fieldId : fFieldIdsOrder) {
420 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
421 fields.emplace_back(fieldId);
422 }
423 return fields;
424}
425
427 const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
428 : fNTuple(ntuple), fColumns(field.GetLogicalColumnIds())
429{
430}
431
433 const RNTupleDescriptor &ntuple)
434 : fNTuple(ntuple)
435{
436 std::deque<DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
437
438 while (!fieldIdQueue.empty()) {
439 auto currFieldId = fieldIdQueue.front();
440 fieldIdQueue.pop_front();
441
442 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
443 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
444
445 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
446 auto fieldId = field.GetId();
447 fieldIdQueue.push_back(fieldId);
448 }
449 }
450}
451
453{
454 std::vector<std::uint64_t> result;
455 unsigned int base = 0;
456 std::uint64_t flags = 0;
457 for (auto f : fFeatureFlags) {
458 if ((f > 0) && ((f % 64) == 0))
459 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
460 while (f > base + 64) {
461 result.emplace_back(flags);
462 flags = 0;
463 base += 64;
464 }
465 f -= base;
466 flags |= 1 << f;
467 }
468 result.emplace_back(flags);
469 return result;
470}
471
474 std::vector<RClusterDescriptor> &clusterDescs)
475{
476 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
477 if (iter == fClusterGroupDescriptors.end())
478 return R__FAIL("invalid attempt to add details of unknown cluster group");
479 if (iter->second.HasClusterDetails())
480 return R__FAIL("invalid attempt to re-populate cluster group details");
481 if (iter->second.GetNClusters() != clusterDescs.size())
482 return R__FAIL("mismatch of number of clusters");
483
484 std::vector<DescriptorId_t> clusterIds;
485 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
486 clusterIds.emplace_back(clusterDescs[i].GetId());
487 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
488 if (!success) {
489 return R__FAIL("invalid attempt to re-populate existing cluster");
490 }
491 }
492 auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
493 cgBuilder.AddClusters(clusterIds);
494 iter->second = cgBuilder.MoveDescriptor().Unwrap();
495 return RResult<void>::Success();
496}
497
500{
501 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
502 if (iter == fClusterGroupDescriptors.end())
503 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
504 if (!iter->second.HasClusterDetails())
505 return R__FAIL("invalid attempt to drop details of cluster group summary");
506
507 for (auto clusterId : iter->second.GetClusterIds())
508 fClusterDescriptors.erase(clusterId);
509 iter->second = iter->second.CloneSummary();
510 return RResult<void>::Success();
511}
512
513std::unique_ptr<ROOT::Experimental::RNTupleModel>
515{
516 auto fieldZero = std::make_unique<RFieldZero>();
517 fieldZero->SetOnDiskId(GetFieldZeroId());
518 auto model =
519 options.fCreateBare ? RNTupleModel::CreateBare(std::move(fieldZero)) : RNTupleModel::Create(std::move(fieldZero));
520 bool continueOnError = options.fForwardCompatible;
521 for (const auto &topDesc : GetTopLevelFields()) {
522 auto field = topDesc.CreateField(*this, continueOnError);
523 if (dynamic_cast<RInvalidField *>(field.get()))
524 continue;
525
526 if (options.fReconstructProjections && topDesc.IsProjectedField()) {
527 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
528 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
529 });
530 } else {
531 model->AddField(std::move(field));
532 }
533 }
534 model->Freeze();
535 return model;
536}
537
538std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTupleDescriptor::Clone() const
539{
540 auto clone = std::make_unique<RNTupleDescriptor>();
541 clone->fName = fName;
542 clone->fDescription = fDescription;
543 clone->fOnDiskHeaderXxHash3 = fOnDiskHeaderXxHash3;
544 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
545 clone->fOnDiskFooterSize = fOnDiskFooterSize;
546 clone->fNEntries = fNEntries;
547 clone->fNClusters = fNClusters;
548 clone->fNPhysicalColumns = fNPhysicalColumns;
549 clone->fFieldZeroId = fFieldZeroId;
550 clone->fGeneration = fGeneration;
551 for (const auto &d : fFieldDescriptors)
552 clone->fFieldDescriptors.emplace(d.first, d.second.Clone());
553 for (const auto &d : fColumnDescriptors)
554 clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
555 for (const auto &d : fClusterGroupDescriptors)
556 clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
557 for (const auto &d : fClusterDescriptors)
558 clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
559 for (const auto &d : fExtraTypeInfoDescriptors)
560 clone->fExtraTypeInfoDescriptors.emplace_back(d.Clone());
562 clone->fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
563 return clone;
564}
565
566////////////////////////////////////////////////////////////////////////////////
567
569{
570 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
571 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
572}
573
575{
577 clone.fClusterGroupId = fClusterGroupId;
578 clone.fPageListLocator = fPageListLocator;
579 clone.fPageListLength = fPageListLength;
580 clone.fMinEntry = fMinEntry;
581 clone.fEntrySpan = fEntrySpan;
582 clone.fNClusters = fNClusters;
583 return clone;
584}
585
587{
588 RClusterGroupDescriptor clone = CloneSummary();
589 clone.fClusterIds = fClusterIds;
590 return clone;
591}
592
593////////////////////////////////////////////////////////////////////////////////
594
596 DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings,
597 const RClusterDescriptor::RPageRange &pageRange)
598{
599 if (physicalId != pageRange.fPhysicalColumnId)
600 return R__FAIL("column ID mismatch");
601 if (fCluster.fColumnRanges.count(physicalId) > 0)
602 return R__FAIL("column ID conflict");
603 RClusterDescriptor::RColumnRange columnRange{physicalId, firstElementIndex, ClusterSize_t{0}};
604 columnRange.fCompressionSettings = compressionSettings;
605 for (const auto &pi : pageRange.fPageInfos) {
606 columnRange.fNElements += pi.fNElements;
607 }
608 fCluster.fPageRanges[physicalId] = pageRange.Clone();
609 fCluster.fColumnRanges[physicalId] = columnRange;
610 return RResult<void>::Success();
611}
612
615{
616 if (fCluster.fColumnRanges.count(physicalId) > 0)
617 return R__FAIL("column ID conflict");
618
620 columnRange.fPhysicalColumnId = physicalId;
622 columnRange.fIsSuppressed = true;
623 fCluster.fColumnRanges[physicalId] = columnRange;
624 return RResult<void>::Success();
625}
626
629{
630 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
631 if (!columnRange.fIsSuppressed)
632 continue;
633 R__ASSERT(columnRange.fFirstElementIndex == kInvalidNTupleIndex);
634
635 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.fPhysicalColumnId);
636 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
637 // We expect only few columns and column representations per field, so we do a linear search
638 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
639 const auto &otherColumnDesc = desc.GetColumnDescriptor(otherColumnLogicalId);
640 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
641 continue;
642 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
643 continue;
644
645 // Found corresponding column of a different column representation
646 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
647 if (otherColumnRange.fIsSuppressed)
648 continue;
649
650 columnRange.fFirstElementIndex = otherColumnRange.fFirstElementIndex;
651 columnRange.fNElements = otherColumnRange.fNElements;
652 break;
653 }
654
655 if (columnRange.fFirstElementIndex == kInvalidNTupleIndex) {
656 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
657 std::to_string(columnRange.fPhysicalColumnId) +
658 ", cluster ID: " + std::to_string(fCluster.GetId()));
659 }
660 }
661 return RResult<void>::Success();
662}
663
666{
667 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
668 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
669 /// parent field in the hierarchy.
670 auto fnTraverseSubtree = [&](DescriptorId_t rootFieldId, std::uint64_t nRepetitionsAtThisLevel,
671 const auto &visitField, const auto &enterSubtree) -> void {
672 visitField(rootFieldId, nRepetitionsAtThisLevel);
673 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
674 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
675 enterSubtree(f.GetId(), nRepetitions, visitField, enterSubtree);
676 }
677 };
678
679 // Extended columns can only be part of the header extension
680 if (!desc.GetHeaderExtension())
681 return *this;
682
683 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
684 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
685 for (const auto &topLevelField : desc.GetTopLevelFields()) {
686 fnTraverseSubtree(
687 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
688 [&](DescriptorId_t fieldId, std::uint64_t nRepetitions) {
689 for (const auto &c : desc.GetColumnIterable(fieldId)) {
690 const DescriptorId_t physicalId = c.GetPhysicalId();
691 auto &columnRange = fCluster.fColumnRanges[physicalId];
692
693 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
694 // extension won't have on-disk metadata for the clusters that were already committed before the model
695 // was extended. Therefore, these need to be synthetically initialized upon reading.
696 if (columnRange.fPhysicalColumnId == kInvalidDescriptorId) {
697 columnRange.fPhysicalColumnId = physicalId;
698 columnRange.fFirstElementIndex = 0;
699 columnRange.fNElements = 0;
700 columnRange.fIsSuppressed = c.IsSuppressedDeferredColumn();
701 }
702 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
703 // number of elements should have been if the column was not deferred; fix those and let
704 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
705 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
706 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
707 // excluding subfields of collection and variant fields.
708 if (c.IsDeferredColumn()) {
709 columnRange.fFirstElementIndex = fCluster.GetFirstEntryIndex() * nRepetitions;
710 columnRange.fNElements = fCluster.GetNEntries() * nRepetitions;
711 if (!columnRange.fIsSuppressed) {
712 auto &pageRange = fCluster.fPageRanges[physicalId];
713 pageRange.fPhysicalColumnId = physicalId;
714 const auto element = Internal::RColumnElementBase::Generate<void>(c.GetType());
715 pageRange.ExtendToFitColumnRange(columnRange, *element, Internal::RPage::kPageZeroSize);
716 }
717 } else if (!columnRange.fIsSuppressed) {
718 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
719 }
720 }
721 },
722 fnTraverseSubtree);
723 }
724 return *this;
725}
726
729{
730 if (fCluster.fClusterId == kInvalidDescriptorId)
731 return R__FAIL("unset cluster ID");
732 if (fCluster.fNEntries == 0)
733 return R__FAIL("empty cluster");
734 for (const auto &pr : fCluster.fPageRanges) {
735 if (fCluster.fColumnRanges.count(pr.first) == 0) {
736 return R__FAIL("missing column range");
737 }
738 }
740 std::swap(result, fCluster);
741 return result;
742}
743
744////////////////////////////////////////////////////////////////////////////////
745
748 const RClusterGroupDescriptor &clusterGroupDesc)
749{
751 builder.ClusterGroupId(clusterGroupDesc.GetId())
752 .PageListLocator(clusterGroupDesc.GetPageListLocator())
753 .PageListLength(clusterGroupDesc.GetPageListLength())
754 .MinEntry(clusterGroupDesc.GetMinEntry())
755 .EntrySpan(clusterGroupDesc.GetEntrySpan())
756 .NClusters(clusterGroupDesc.GetNClusters());
757 return builder;
758}
759
762{
763 if (fClusterGroup.fClusterGroupId == kInvalidDescriptorId)
764 return R__FAIL("unset cluster group ID");
766 std::swap(result, fClusterGroup);
767 return result;
768}
769
770////////////////////////////////////////////////////////////////////////////////
771
774{
775 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
776 throw RException(R__FAIL("invalid extra type info content id"));
778 std::swap(result, fExtraTypeInfo);
779 return result;
780}
781
782////////////////////////////////////////////////////////////////////////////////
783
786{
787 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
788 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
789 return RResult<void>::Success();
790}
791
793{
794 // Reuse field name validity check
795 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
796 if (!validName) {
797 return R__FORWARD_ERROR(validName);
798 }
799
800 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
801 // parent not properly set?
802 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
803 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
804 }
805
806 // Same number of columns in every column representation?
807 const auto columnCardinality = fieldDesc.GetColumnCardinality();
808 if (columnCardinality == 0)
809 continue;
810
811 // In AddColumn, we already checked that all but the last representation are complete.
812 // Check that the last column representation is complete, i.e. has all columns.
813 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
814 const auto nColumns = logicalColumnIds.size();
815 // If we have only a single column representation, the following condition is true by construction
816 if ((nColumns + 1) == columnCardinality)
817 continue;
818
819 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
820 if (lastColumn.GetIndex() + 1 != columnCardinality)
821 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
822 }
823
824 return RResult<void>::Success();
825}
826
828{
829 EnsureValidDescriptor().ThrowOnError();
831 std::swap(result, fDescriptor);
832 return result;
833}
834
836 const std::string_view description)
837{
838 fDescriptor.fName = std::string(name);
839 fDescriptor.fDescription = std::string(description);
840}
841
843{
844 if (flag % 64 == 0)
845 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
846 fDescriptor.fFeatureFlags.insert(flag);
847}
848
851{
852 if (fColumn.GetLogicalId() == kInvalidDescriptorId)
853 return R__FAIL("invalid logical column id");
854 if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
855 return R__FAIL("invalid physical column id");
856 if (fColumn.GetFieldId() == kInvalidDescriptorId)
857 return R__FAIL("invalid field id, dangling column");
858
859 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
860 // created with a future version of ROOT. In this case we just skip the valid bit range check,
861 // as we have no idea what the valid range is.
862 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
863 // we try to read the actual data contained in it.
864 if (fColumn.GetType() != EColumnType::kUnknown) {
865 const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
866 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
867 return R__FAIL("invalid column bit width");
868 }
869
870 return fColumn.Clone();
871}
872
874 : fField(fieldDesc.Clone())
875{
877 fField.fLinkIds = {};
879}
880
883{
884 RFieldDescriptorBuilder fieldDesc;
885 fieldDesc.FieldVersion(field.GetFieldVersion())
887 .FieldName(field.GetFieldName())
889 .TypeName(field.GetTypeName())
890 .TypeAlias(field.GetTypeAlias())
891 .Structure(field.GetStructure())
894 fieldDesc.TypeChecksum(field.GetTypeChecksum());
895 return fieldDesc;
896}
897
900{
901 if (fField.GetId() == kInvalidDescriptorId) {
902 return R__FAIL("invalid field id");
903 }
904 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
905 return R__FAIL("invalid field structure");
906 }
907 // FieldZero is usually named "" and would be a false positive here
908 if (fField.GetParentId() != kInvalidDescriptorId) {
909 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
910 if (!validName) {
911 return R__FORWARD_ERROR(validName);
912 }
913 if (fField.GetFieldName().empty()) {
914 return R__FAIL("name cannot be empty string \"\"");
915 }
916 }
917 return fField.Clone();
918}
919
921{
922 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
923 if (fDescriptor.fHeaderExtension)
924 fDescriptor.fHeaderExtension->AddExtendedField(fieldDesc);
925 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
926 fDescriptor.fFieldZeroId = fieldDesc.GetId();
927 }
928}
929
932{
933 auto fieldExists = RResult<void>::Success();
934 if (!(fieldExists = EnsureFieldExists(fieldId)))
935 return R__FORWARD_ERROR(fieldExists);
936 if (!(fieldExists = EnsureFieldExists(linkId)))
937 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
938
939 if (linkId == fDescriptor.GetFieldZeroId()) {
940 return R__FAIL("cannot make FieldZero a child field");
941 }
942 // fail if field already has another valid parent
943 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
944 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
945 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
946 }
947 if (fieldId == linkId) {
948 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
949 }
950 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
951 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
952 return RResult<void>::Success();
953}
954
957 DescriptorId_t targetId)
958{
959 auto fieldExists = RResult<void>::Success();
960 if (!(fieldExists = EnsureFieldExists(sourceId)))
961 return R__FORWARD_ERROR(fieldExists);
962 if (!(fieldExists = EnsureFieldExists(targetId)))
963 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
964
965 if (targetId == fDescriptor.GetFieldZeroId()) {
966 return R__FAIL("cannot make FieldZero a projected field");
967 }
968 if (sourceId == targetId) {
969 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
970 }
971 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
972 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
973 }
974 // fail if target field already has another valid projection source
975 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
976 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
977 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
978 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
979 }
980 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
981 return RResult<void>::Success();
982}
983
986{
987 const auto fieldId = columnDesc.GetFieldId();
988 const auto columnIndex = columnDesc.GetIndex();
989 const auto representationIndex = columnDesc.GetRepresentationIndex();
990
991 auto fieldExists = EnsureFieldExists(fieldId);
992 if (!fieldExists) {
993 return R__FORWARD_ERROR(fieldExists);
994 }
995 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
996
997 if (columnDesc.IsAliasColumn()) {
998 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
999 return R__FAIL("alias column type mismatch");
1000 }
1001 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != kInvalidDescriptorId) {
1002 return R__FAIL("column index clash");
1003 }
1004 if (columnIndex > 0) {
1005 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == kInvalidDescriptorId)
1006 return R__FAIL("out of bounds column index");
1007 }
1008 if (representationIndex > 0) {
1009 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == kInvalidDescriptorId) {
1010 return R__FAIL("out of bounds representation index");
1011 }
1012 if (columnIndex == 0) {
1013 assert(fieldDesc.fColumnCardinality > 0);
1014 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1016 return R__FAIL("incomplete column representations");
1017 }
1018 } else {
1019 if (columnIndex >= fieldDesc.fColumnCardinality)
1020 return R__FAIL("irregular column representations");
1021 }
1022 } else {
1023 // This will set the column cardinality to the number of columns of the first representation
1024 fieldDesc.fColumnCardinality = columnIndex + 1;
1025 }
1026
1027 const auto logicalId = columnDesc.GetLogicalId();
1028 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1029
1030 if (!columnDesc.IsAliasColumn())
1031 fDescriptor.fNPhysicalColumns++;
1032 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1033 if (fDescriptor.fHeaderExtension)
1034 fDescriptor.fHeaderExtension->AddExtendedColumn(columnDesc);
1035
1036 return RResult<void>::Success();
1037}
1038
1041{
1042 const auto id = clusterGroup.GetId();
1043 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1044 return R__FAIL("cluster group id clash");
1045 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1046 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1047 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1048 return RResult<void>::Success();
1049}
1050
1052{
1053 fDescriptor.fName = "";
1054 fDescriptor.fDescription = "";
1055 fDescriptor.fFieldDescriptors.clear();
1056 fDescriptor.fColumnDescriptors.clear();
1057 fDescriptor.fClusterDescriptors.clear();
1058 fDescriptor.fClusterGroupDescriptors.clear();
1059 fDescriptor.fHeaderExtension.reset();
1060}
1061
1063{
1064 if (!fDescriptor.fHeaderExtension)
1065 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1066}
1067
1069{
1070 if (fDescriptor.GetNLogicalColumns() == 0)
1071 return;
1072 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1073
1074 for (DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1075 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1076 R__ASSERT(c.IsAliasColumn());
1077 R__ASSERT(id == c.GetLogicalId());
1078 fDescriptor.fColumnDescriptors.erase(id);
1079 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1080 if (link == c.fLogicalColumnId) {
1081 link += offset;
1082 break;
1083 }
1084 }
1085 c.fLogicalColumnId += offset;
1086 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1087 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1088 }
1089}
1090
1093{
1094 auto clusterId = clusterDesc.GetId();
1095 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1096 return R__FAIL("cluster id clash");
1097 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1098 return RResult<void>::Success();
1099}
1100
1103{
1104 // Make sure we have no duplicates
1105 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1106 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1107 return R__FAIL("extra type info duplicates");
1108 }
1109 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1110 return RResult<void>::Success();
1111}
1112
1115{
1117 const auto &desc = GetDescriptor();
1118
1119 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1120 fnWalkFieldTree = [&desc, &streamerInfoMap, &fnWalkFieldTree](const RFieldDescriptor &fieldDesc) {
1121 if (fieldDesc.IsCustomClass()) {
1122 // Add streamer info for this class to streamerInfoMap
1123 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1124 if (!cl) {
1125 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1126 }
1127 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1128 if (!streamerInfo) {
1129 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1130 }
1131 streamerInfoMap[streamerInfo->GetNumber()] = streamerInfo;
1132 }
1133
1134 // Recursively traverse sub fields
1135 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1136 fnWalkFieldTree(subFieldDesc);
1137 }
1138 };
1139
1140 fnWalkFieldTree(desc.GetFieldZero());
1141
1142 // Add the streamer info records from streamer fields: because of runtime polymorphism we may need to add additional
1143 // types not covered by the type names stored in the field headers
1144 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1145 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1146 continue;
1147 // Ideally, we would avoid deserializing the streamer info records of the streamer fields that we just serialized.
1148 // However, this happens only once at the end of writing and only when streamer fields are used, so the
1149 // preference here is for code simplicity.
1150 streamerInfoMap.merge(RNTupleSerializer::DeserializeStreamerInfos(extraTypeInfo.GetContent()).Unwrap());
1151 }
1152
1153 return streamerInfoMap;
1154}
1155
1158{
1159 return RColumnRangeIterable(*this);
1160}
1161
1164{
1165 return RFieldDescriptorIterable(*this, fieldDesc);
1166}
1167
1169 const RFieldDescriptor &fieldDesc, const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1170{
1171 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
1172}
1173
1176{
1177 return GetFieldIterable(GetFieldDescriptor(fieldId));
1178}
1179
1181 DescriptorId_t fieldId, const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1182{
1183 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
1184}
1185
1188{
1189 return GetFieldIterable(GetFieldZeroId());
1190}
1191
1194 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1195{
1196 return GetFieldIterable(GetFieldZeroId(), comparator);
1197}
1198
1201{
1202 return RColumnDescriptorIterable(*this);
1203}
1204
1207{
1208 return RColumnDescriptorIterable(*this, fieldDesc);
1209}
1210
1213{
1214 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
1215}
1216
1219{
1220 return RClusterGroupDescriptorIterable(*this);
1221}
1222
1225{
1226 return RClusterDescriptorIterable(*this);
1227}
1228
1231{
1233}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:294
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define PI
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
std::size_t GetPackedSize(std::size_t nElements=1U) const
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Records the partition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
Meta-data stored for every column of an ntuple.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
EColumnType fType
The on-disk column type.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
const std::string & GetTypeAlias() const
const std::string & GetDescription() const
Get the field's description.
const std::string & GetFieldName() const
ENTupleStructure GetStructure() const
const std::string & GetTypeName() const
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias, bool continueOnError=false)
Factory method to resurrect a field from the stored on-disk type information.
static constexpr int kTraitTypeChecksum
The TClass checksum is set and valid.
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
virtual std::uint32_t GetTypeChecksum() const
Return the current TClass reported checksum of this class. Only valid if kTraitTypeChecksum is set.
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
std::size_t GetNRepetitions() const
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError=false) const
In general, we create a field simply from the C++ type name.
const std::string & GetFieldName() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:76
Used to loop over all the clusters of an ntuple (in unspecified order)
Used to loop over all the cluster groups of an ntuple (in unspecified order)
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an ntuple (in unspecified order)
std::vector< DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RColumnDescriptorIterable GetColumnIterable() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3037
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fForwardCompatible
Normally creating a model will fail if any of the reconstructed fields contains an unknown column typ...