Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleDescriptor
16#define ROOT_RNTupleDescriptor
17
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleTypes.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <TError.h>
25
26#include <algorithm>
27#include <chrono>
28#include <cmath>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <optional>
34#include <ostream>
35#include <vector>
36#include <set>
37#include <string>
38#include <string_view>
39#include <unordered_map>
40#include <unordered_set>
41
42namespace ROOT {
43
44class RFieldBase;
45class RNTupleModel;
46
47namespace Internal {
48class RColumnElementBase;
49}
50
51class RNTupleDescriptor;
52
53namespace Internal {
54class RColumnDescriptorBuilder;
55class RClusterDescriptorBuilder;
56class RClusterGroupDescriptorBuilder;
57class RExtraTypeInfoDescriptorBuilder;
58class RFieldDescriptorBuilder;
59class RNTupleDescriptorBuilder;
60
61RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
66
67std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
68} // namespace Internal
69
70namespace Experimental {
71
72// clang-format off
73/**
74\class ROOT::Experimental::RNTupleAttrSetDescriptor
75\ingroup NTuple
76\brief Metadata stored for every Attribute Set linked to an RNTuple.
77*/
78// clang-format on
81
82 std::uint16_t fSchemaVersionMajor = 0;
83 std::uint16_t fSchemaVersionMinor = 0;
84 std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
85 // The locator of the AttributeSet anchor.
86 // In case of kTypeFile, it points to the beginning of the Anchor's payload.
87 // NOTE: Only kTypeFile is supported at the moment.
89 std::string fName;
90
91public:
97
98 bool operator==(const RNTupleAttrSetDescriptor &other) const;
99 bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
100
101 const std::string &GetName() const { return fName; }
102 std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
103 std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
104 std::uint32_t GetAnchorLength() const { return fAnchorLength; }
106
108};
109
110class RNTupleAttrSetDescriptorIterable;
111
112} // namespace Experimental
113
114// clang-format off
115/**
116\class ROOT::RFieldDescriptor
117\ingroup NTuple
118\brief Metadata stored for every field of an RNTuple
119*/
120// clang-format on
124
125private:
127 /// The version of the C++-type-to-column translation mechanics
128 std::uint32_t fFieldVersion = 0;
129 /// The version of the C++ type itself
130 std::uint32_t fTypeVersion = 0;
131 /// The leaf name, not including parent fields
132 std::string fFieldName;
133 /// Free text set by the user
134 std::string fFieldDescription;
135 /// The C++ type that was used when writing the field
136 std::string fTypeName;
137 /// A typedef or using directive that resolved to the type name during field creation
138 std::string fTypeAlias;
139 /// The number of elements per entry for fixed-size arrays
140 std::uint64_t fNRepetitions = 0;
141 /// The structural information carried by this field in the data model tree
143 /// Establishes sub field relationships, such as classes and collections
145 /// For projected fields, the source field ID
147 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
148 /// order of sub fields.
149 std::vector<ROOT::DescriptorId_t> fLinkIds;
150 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
151 /// list of logical column ids. For example, the second column of the third column representation is
152 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
153 std::uint32_t fColumnCardinality = 0;
154 /// The ordered list of columns attached to this field: first by representation index then by column index.
155 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
156 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
157 /// identify types by their checksum
158 std::optional<std::uint32_t> fTypeChecksum;
159 /// Indicates if this is a collection that should be represented in memory by a SoA layout.
160 bool fIsSoACollection = false;
161
162public:
163 RFieldDescriptor() = default;
168
169 bool operator==(const RFieldDescriptor &other) const;
170 /// Get a copy of the descriptor
171 RFieldDescriptor Clone() const;
172
173 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
174 /// access to sub fields, which is provided by the RNTupleDescriptor argument.
175 std::unique_ptr<ROOT::RFieldBase>
176 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
177
179 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
180 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
181 const std::string &GetFieldName() const { return fFieldName; }
182 const std::string &GetFieldDescription() const { return fFieldDescription; }
183 const std::string &GetTypeName() const { return fTypeName; }
184 const std::string &GetTypeAlias() const { return fTypeAlias; }
185 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
189 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
190 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
191 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
192 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
194 bool IsSoACollection() const { return fIsSoACollection; }
195
196 bool IsCustomClass() const R__DEPRECATED(6, 42, "removed from public interface");
197 bool IsCustomEnum(const RNTupleDescriptor &desc) const R__DEPRECATED(6, 42, "removed from public interface");
198 bool IsStdAtomic() const R__DEPRECATED(6, 42, "removed from public interface");
199};
200
201// clang-format off
202/**
203\class ROOT::RColumnDescriptor
204\ingroup NTuple
205\brief Metadata stored for every column of an RNTuple
206*/
207// clang-format on
211
212public:
213 struct RValueRange {
214 double fMin = 0, fMax = 0;
215
216 RValueRange() = default;
217 RValueRange(double min, double max) : fMin(min), fMax(max) {}
218 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
219
220 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
221 bool operator!=(RValueRange other) const { return !(*this == other); }
222 };
223
224private:
225 /// The actual column identifier, which is the link to the corresponding field
227 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
229 /// Every column belongs to one and only one field
231 /// The absolute value specifies the index for the first stored element for this column.
232 /// For deferred columns the absolute value is larger than zero.
233 /// Negative values specify a suppressed and deferred column.
234 std::int64_t fFirstElementIndex = 0U;
235 /// A field can be serialized into several columns, which are numbered from zero to $n$
236 std::uint32_t fIndex = 0;
237 /// A field may use multiple column representations, which are numbered from zero to $m$.
238 /// Every representation has the same number of columns.
239 std::uint16_t fRepresentationIndex = 0;
240 /// The size in bits of elements of this column. Most columns have the size fixed by their type
241 /// but low-precision float columns have variable bit widths.
242 std::uint16_t fBitsOnStorage = 0;
243 /// The on-disk column type
245 /// Optional value range (used e.g. by quantized real fields)
246 std::optional<RValueRange> fValueRange;
247
248public:
249 RColumnDescriptor() = default;
254
255 bool operator==(const RColumnDescriptor &other) const;
256 /// Get a copy of the descriptor
257 RColumnDescriptor Clone() const;
258
259 ROOT::DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
260 ROOT::DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
262 std::uint32_t GetIndex() const { return fIndex; }
263 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
264 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
265 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
266 ROOT::ENTupleColumnType GetType() const { return fType; }
267 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
268 bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
269 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
270 bool IsSuppressedDeferredColumn() const { return fFirstElementIndex < 0; }
271};
272
273// clang-format off
274/**
275\class ROOT::RClusterDescriptor
276\ingroup NTuple
277\brief Metadata for RNTuple clusters
278
279The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
280In a second phase, the event range, column group, page locations and column ranges are added.
281Both phases are populated by the RClusterDescriptorBuilder.
282Clusters span across all available columns in the RNTuple.
283*/
284// clang-format on
287
288public:
289 // clang-format off
290 /**
291 \class ROOT::RClusterDescriptor::RColumnRange
292 \ingroup NTuple
293 \brief The window of element indexes of a particular column in a particular cluster
294 */
295 // clang-format on
298 /// The global index of the first column element in the cluster
300 /// The number of column elements in the cluster
302 /// The usual format for ROOT compression settings (see Compression.h).
303 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
304 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
305 std::optional<std::uint32_t> fCompressionSettings;
306 /// Suppressed columns have an empty page range and unknown compression settings.
307 /// Their element index range, however, is aligned with the corresponding column of the
308 /// primary column representation (see Section "Suppressed Columns" in the specification)
309 bool fIsSuppressed = false;
310
311 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
312 // Should this be done on the field level?
313
314 public:
315 RColumnRange() = default;
316
318 ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
319 bool suppressed = false)
320 : fPhysicalColumnId(physicalColumnId),
321 fFirstElementIndex(firstElementIndex),
322 fNElements(nElements),
323 fCompressionSettings(compressionSettings),
324 fIsSuppressed(suppressed)
325 {
326 }
327
328 ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
329 void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
330
331 ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
332 void SetFirstElementIndex(ROOT::NTupleSize_t idx) { fFirstElementIndex = idx; }
333 void IncrementFirstElementIndex(ROOT::NTupleSize_t by) { fFirstElementIndex += by; }
334
335 ROOT::NTupleSize_t GetNElements() const { return fNElements; }
336 void SetNElements(ROOT::NTupleSize_t n) { fNElements = n; }
338
339 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
340 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
341
342 bool IsSuppressed() const { return fIsSuppressed; }
343 void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
344
345 bool operator==(const RColumnRange &other) const
346 {
347 return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
348 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
349 fIsSuppressed == other.fIsSuppressed;
350 }
351
353 {
354 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
355 }
356 };
357
358 // clang-format off
359 /**
360 \class ROOT::RClusterDescriptor::RPageInfo
361 \ingroup NTuple
362 \brief Information about a single page in the context of a cluster's page range.
363 */
364 // clang-format on
365 // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
366 // the page belongs
367 struct RPageInfo {
368 private:
369 /// The meaning of `fLocator` depends on the storage backend.
371 /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
372 std::uint32_t fNElements = std::uint32_t(-1);
373 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
374 bool fHasChecksum = false;
375
376 public:
377 RPageInfo() = default;
379 : fLocator(locator), fNElements(nElements), fHasChecksum(hasChecksum)
380 {
381 }
382
383 bool operator==(const RPageInfo &other) const
384 {
385 return fLocator == other.fLocator && fNElements == other.fNElements;
386 }
387
388 const RNTupleLocator &GetLocator() const { return fLocator; }
389 RNTupleLocator &GetLocator() { return fLocator; }
390 void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
391
392 std::uint32_t GetNElements() const { return fNElements; }
393 void SetNElements(std::uint32_t n) { fNElements = n; }
394
395 bool HasChecksum() const { return fHasChecksum; }
396 void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
397 };
398
399 // clang-format off
400 /**
401 \class ROOT::RClusterDescriptor::RPageInfoExtended
402 \ingroup NTuple
403 \brief Additional information about a page in an in-memory RPageRange.
404
405 Used by RPageRange::Find() to return information relative to the RPageRange. This information is not stored on disk
406 and we don't need to keep it in memory because it can be easily recomputed.
407 */
408 // clang-format on
410 private:
411 /// Index (in cluster) of the first element in page.
412 ROOT::NTupleSize_t fFirstElementIndex = 0;
413 /// Page number in the corresponding RPageRange.
414 ROOT::NTupleSize_t fPageNumber = 0;
415
416 public:
417 RPageInfoExtended() = default;
422
423 ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
425
426 ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; }
428 };
429
430 // clang-format off
431 /**
432 \class ROOT::RClusterDescriptor::RPageRange
433 \ingroup NTuple
434 \brief Records the partition of data into pages for a particular column in a particular cluster
435 */
436 // clang-format on
439
440 private:
441 /// \brief Extend this RPageRange to fit the given RColumnRange.
442 ///
443 /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
444 /// RPageInfos are constructed to contain as many elements of type `element` given a page size
445 /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
446 /// This function is used to make up RPageRanges for clusters that contain deferred columns.
447 /// \return The number of column elements covered by the synthesized RPageInfos
448 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
450
451 std::vector<RPageInfo> fPageInfos;
452
453 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
454 /// up to and including a given index. Used for binary search in Find().
455 /// This vector is only created if fPageInfos has at least kLargeRangeThreshold elements.
456 std::unique_ptr<std::vector<ROOT::NTupleSize_t>> fCumulativeNElements;
457
459
460 public:
461 /// Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search through.
462 static constexpr std::size_t kLargeRangeThreshold = 10;
463
464 RPageRange() = default;
465 RPageRange(const RPageRange &other) = delete;
469
471 {
472 RPageRange clone;
473 clone.fPhysicalColumnId = fPhysicalColumnId;
474 clone.fPageInfos = fPageInfos;
475 if (fCumulativeNElements) {
476 clone.fCumulativeNElements = std::make_unique<std::vector<ROOT::NTupleSize_t>>(*fCumulativeNElements);
477 }
478 return clone;
479 }
480
481 /// Find the page in the RPageRange that contains the given element. The element must exist.
483
484 ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
485 void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
486
487 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
488 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
489
490 bool operator==(const RPageRange &other) const
491 {
492 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
493 }
494 };
495
496private:
498 /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
501
502 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
503 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
504
505public:
507
513
514 RClusterDescriptor Clone() const;
515
516 bool operator==(const RClusterDescriptor &other) const;
517
518 ROOT::DescriptorId_t GetId() const { return fClusterId; }
519 ROOT::NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
520 ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
521 const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
522 const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
523 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
524 RColumnRangeIterable GetColumnRangeIterable() const;
526 {
527 return fColumnRanges.find(physicalId) != fColumnRanges.end();
528 }
529 std::uint64_t GetNBytesOnStorage() const;
530};
531
533private:
535
536public:
538 private:
539 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
540 /// The wrapped map iterator
542
543 public:
544 using iterator_category = std::forward_iterator_tag;
547 using difference_type = std::ptrdiff_t;
548 using pointer = const RColumnRange *;
549 using reference = const RColumnRange &;
550
551 RIterator(Iter_t iter) : fIter(iter) {}
552 iterator &operator++() /* prefix */
553 {
554 ++fIter;
555 return *this;
556 }
557 iterator operator++(int) /* postfix */
558 {
559 auto old = *this;
560 operator++();
561 return old;
562 }
563 reference operator*() const { return fIter->second; }
564 pointer operator->() const { return &fIter->second; }
565 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
566 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
567 };
568
569 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
570
571 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
572 RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
573 size_t size() { return fDesc.fColumnRanges.size(); }
574};
575
576// clang-format off
577/**
578\class ROOT::RClusterGroupDescriptor
579\ingroup NTuple
580\brief Clusters are bundled in cluster groups.
581
582Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
583Every RNTuple has at least one cluster group. The clusters in a cluster group are ordered
584corresponding to their first entry number.
585*/
586// clang-format on
589
590private:
592 /// The cluster IDs can be empty if the corresponding page list is not loaded.
593 /// Otherwise, cluster ids are sorted by first entry number.
594 std::vector<ROOT::DescriptorId_t> fClusterIds;
595 /// The page list that corresponds to the cluster group
597 /// Uncompressed size of the page list
598 std::uint64_t fPageListLength = 0;
599 /// The minimum first entry number of the clusters in the cluster group
600 std::uint64_t fMinEntry = 0;
601 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
602 std::uint64_t fEntrySpan = 0;
603 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
604 std::uint32_t fNClusters = 0;
605
606public:
612
613 RClusterGroupDescriptor Clone() const;
614 /// Creates a clone without the cluster IDs
615 RClusterGroupDescriptor CloneSummary() const;
616
617 bool operator==(const RClusterGroupDescriptor &other) const;
618
619 ROOT::DescriptorId_t GetId() const { return fClusterGroupId; }
620 std::uint32_t GetNClusters() const { return fNClusters; }
621 RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
622 std::uint64_t GetPageListLength() const { return fPageListLength; }
623 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
624 std::uint64_t GetMinEntry() const { return fMinEntry; }
625 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
626 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
627 /// Stage two loads the list of cluster IDs.
628 bool HasClusterDetails() const { return !fClusterIds.empty(); }
629};
630
631/// Used in RExtraTypeInfoDescriptor
633 kInvalid,
635};
636
637// clang-format off
638/**
639\class ROOT::RExtraTypeInfoDescriptor
640\ingroup NTuple
641\brief Field specific extra type information from the header / extenstion header
642
643Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
644*/
645// clang-format on
648
649private:
650 /// Specifies the meaning of the extra information
651 EExtraTypeInfoIds fContentId = EExtraTypeInfoIds::kInvalid;
652 /// Type version the extra type information is bound to
653 std::uint32_t fTypeVersion = 0;
654 /// The type name the extra information refers to; empty for RNTuple-wide extra information
655 std::string fTypeName;
656 /// The content format depends on the content ID and may be binary
657 std::string fContent;
658
659public:
665
666 bool operator==(const RExtraTypeInfoDescriptor &other) const;
667
668 RExtraTypeInfoDescriptor Clone() const;
669
670 EExtraTypeInfoIds GetContentId() const { return fContentId; }
671 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
672 const std::string &GetTypeName() const { return fTypeName; }
673 const std::string &GetContent() const { return fContent; }
674};
675
676namespace Internal {
677// Used by the RNTupleReader to activate/deactivate entries. Needs to adapt when we have sharded clusters.
679} // namespace Internal
680
681// clang-format off
682/**
683\class ROOT::RNTupleDescriptor
684\ingroup NTuple
685\brief The on-storage metadata of an RNTuple
686
687Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
688potentially multiple page lists.
689The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
690The footer carries information about one or several cluster groups and links to their page lists.
691For every cluster group, a page list envelope stores cluster summaries and page locations.
692For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
693locations.
694
695The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
696for RNTuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
697
698The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
699the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
700Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
701and backward compatibility when the metadata evolves.
702*/
703// clang-format on
706 friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
707 friend DescriptorId_t Internal::CallFindClusterIdOn(const RNTupleDescriptor &desc, NTupleSize_t entryIdx);
708
709public:
710 class RHeaderExtension;
711
712private:
713 /// The RNTuple name needs to be unique in a given storage location (file)
714 std::string fName;
715 /// Free text from the user
716 std::string fDescription;
717
718 ROOT::DescriptorId_t fFieldZeroId = ROOT::kInvalidDescriptorId; ///< Set by the descriptor builder
719
720 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
721
722 std::set<unsigned int> fFeatureFlags;
723 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
724 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
725
726 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
727 std::unique_ptr<RHeaderExtension> fHeaderExtension;
728
729 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
730 //// (see CloneSchema())
731
732 std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
733 std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
734 std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
735 std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
736
737 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
738 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
739 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
740
741 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
742 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
743
744 /// \brief The generation of the descriptor
745 ///
746 /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
747 /// active page locations. During the lifetime of the descriptor, page location information for clusters
748 /// can be added or removed. When this happens, the generation should be increased, so that users of the
749 /// descriptor know that the information changed. The generation is increased, e.g., by the page source's
750 /// exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
751 std::uint64_t fGeneration = 0;
752
753 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
754 /// References cluster groups sorted by entry range and thus allows for binary search.
755 /// Note that this list is empty during the descriptor building process and will only be
756 /// created when the final descriptor is extracted from the builder.
757 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
758 /// Potentially a subset of all the available clusters
759 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
760 /// List of AttributeSets linked to this RNTuple
761 std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
762
763 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
765
766 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
767 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
768 /// when merging two RNTuples.
769 RNTupleDescriptor CloneSchema() const;
770
771public:
772 /// All known feature flags.
773 /// Note that the flag values represent the bit _index_, not the already-bitshifted integer.
775 /// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
776 /// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
777 /// backed by columns with different encoding, e.g. a vector<float> whose elements are represented by SplitReal32
778 /// in the first ntuple and by Real32 in the second.
779 /// Added in version 1.1.0.0 of the binary format.
780 kFeatureFlag_NestedDeferredColumns = 0,
781 // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature
782 // flags values, the check in RNTupleSerialize must be updated.
783
784 // End of regular feature flags
786
787 /// Reserved for forward-compatibility testing
788 kFeatureFlag_Test = 137
789 };
790
791 class RColumnDescriptorIterable;
792 class RFieldDescriptorIterable;
793 class RClusterGroupDescriptorIterable;
794 class RClusterDescriptorIterable;
795 class RExtraTypeInfoDescriptorIterable;
797
798 /// Modifiers passed to CreateModel()
800 private:
801 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
802 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
803 bool fReconstructProjections = false;
804 /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
805 /// or an unknown field structural role.
806 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
807 /// or indirectly) will be skipped instead.
808 bool fForwardCompatible = false;
809 /// If true, the model will be created without a default entry (bare model).
810 bool fCreateBare = false;
811 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
812 /// as record fields from the on-disk information; otherwise, they will cause an error.
813 bool fEmulateUnknownTypes = false;
814
815 public:
816 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
817
818 void SetReconstructProjections(bool v) { fReconstructProjections = v; }
819 bool GetReconstructProjections() const { return fReconstructProjections; }
820
821 void SetForwardCompatible(bool v) { fForwardCompatible = v; }
822 bool GetForwardCompatible() const { return fForwardCompatible; }
823
824 void SetCreateBare(bool v) { fCreateBare = v; }
825 bool GetCreateBare() const { return fCreateBare; }
826
827 void SetEmulateUnknownTypes(bool v) { fEmulateUnknownTypes = v; }
828 bool GetEmulateUnknownTypes() const { return fEmulateUnknownTypes; }
829 };
830
831 RNTupleDescriptor() = default;
836
837 RNTupleDescriptor Clone() const;
838
839 bool operator==(const RNTupleDescriptor &other) const;
840
841 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
842 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
843 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
844 /// \see ROOT::RNTuple::GetCurrentVersion()
845 std::uint64_t GetVersion() const
846 {
847 return (static_cast<std::uint64_t>(fVersionEpoch) << 48) | (static_cast<std::uint64_t>(fVersionMajor) << 32) |
848 (static_cast<std::uint64_t>(fVersionMinor) << 16) | (static_cast<std::uint64_t>(fVersionPatch));
849 }
850
852 {
853 return fFieldDescriptors.at(fieldId);
854 }
856 {
857 return fColumnDescriptors.at(columnId);
858 }
860 {
861 return fClusterGroupDescriptors.at(clusterGroupId);
862 }
864 {
865 return fClusterDescriptors.at(clusterId);
866 }
867
868 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
869 RFieldDescriptorIterable
870 GetFieldIterable(const RFieldDescriptor &fieldDesc,
871 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
872 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
873 RFieldDescriptorIterable
874 GetFieldIterable(ROOT::DescriptorId_t fieldId,
875 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
876
877 RFieldDescriptorIterable GetTopLevelFields() const;
878 RFieldDescriptorIterable
879 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
880
881 RColumnDescriptorIterable GetColumnIterable() const;
882 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
883 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
884
885 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
886
887 RClusterDescriptorIterable GetClusterIterable() const;
888
889 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
890
892
893 const std::string &GetName() const { return fName; }
894 const std::string &GetDescription() const { return fDescription; }
895
896 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
897 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
898 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
899 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
900 std::size_t GetNClusters() const { return fNClusters; }
901 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
902 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
903 std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
904
905 /// We know the number of entries from adding the cluster summaries
906 ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
908
909 /// Returns the logical parent of all top-level RNTuple data fields.
910 ROOT::DescriptorId_t GetFieldZeroId() const { return fFieldZeroId; }
911 const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
912 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
913 /// Searches for a top-level field
914 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
915 ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
916 std::uint16_t representationIndex) const;
917 ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
918 std::uint16_t representationIndex) const;
920 ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const;
921 ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const;
922
923 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
924 /// In case of invalid field ID, an empty string is returned.
925 std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
926
927 /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
928 std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
929
930 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
931 std::vector<std::uint64_t> GetFeatureFlags() const;
932
933 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
934 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
935
936 /// Methods to load and drop cluster group details (cluster IDs and page locations)
938 AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
939 RResult<void> DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId);
940
941 std::uint64_t GetGeneration() const { return fGeneration; }
942 void IncGeneration() { fGeneration++; }
943
944 /// Re-create the C++ model from the stored metadata
945 std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
946 void PrintInfo(std::ostream &output) const;
947};
948
949// clang-format off
950/**
951\class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
952\ingroup NTuple
953\brief Used to loop over a field's associated columns
954*/
955// clang-format on
957private:
958 /// The associated RNTuple for this range.
960 /// The descriptor ids of the columns ordered by field, representation, and column index
961 std::vector<ROOT::DescriptorId_t> fColumns = {};
962
963public:
965 private:
966 /// The enclosing range's RNTuple.
968 /// The enclosing range's descriptor id list.
969 const std::vector<ROOT::DescriptorId_t> &fColumns;
970 std::size_t fIndex = 0;
971
972 public:
973 using iterator_category = std::forward_iterator_tag;
976 using difference_type = std::ptrdiff_t;
977 using pointer = const RColumnDescriptor *;
979
980 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
981 : fNTuple(ntuple), fColumns(columns), fIndex(index)
982 {
983 }
984 iterator &operator++() /* prefix */
985 {
986 ++fIndex;
987 return *this;
988 }
989 iterator operator++(int) /* postfix */
990 {
991 auto old = *this;
992 operator++();
993 return old;
994 }
995 reference operator*() const { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
996 pointer operator->() const { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
997 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
998 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
999 };
1000
1003
1004 RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
1005 RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
1006 size_t size() { return fColumns.size(); }
1007};
1008
1009// clang-format off
1010/**
1011\class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
1012\ingroup NTuple
1013\brief Used to loop over a field's child fields
1014*/
1015// clang-format on
1017private:
1018 /// The associated RNTuple for this range.
1020 /// The descriptor IDs of the child fields. These may be sorted using
1021 /// a comparison function.
1022 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
1023
1024public:
1026 private:
1027 /// The enclosing range's RNTuple.
1029 /// The enclosing range's descriptor id list.
1030 const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
1031 std::size_t fIndex = 0;
1032
1033 public:
1034 using iterator_category = std::forward_iterator_tag;
1037 using difference_type = std::ptrdiff_t;
1038 using pointer = const RFieldDescriptor *;
1040
1041 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
1042 std::size_t index)
1043 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
1044 {
1045 }
1046 iterator &operator++() /* prefix */
1047 {
1048 ++fIndex;
1049 return *this;
1050 }
1051 iterator operator++(int) /* postfix */
1052 {
1053 auto old = *this;
1054 operator++();
1055 return old;
1056 }
1057 reference operator*() const { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1058 pointer operator->() const { return &fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1059 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1060 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1061 };
1063 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1064 {
1065 }
1066 /// Sort the range using an arbitrary comparison function.
1068 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1069 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1070 {
1071 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1072 }
1073 RIterator begin() { return RIterator(fNTuple, fFieldChildren, 0); }
1074 RIterator end() { return RIterator(fNTuple, fFieldChildren, fFieldChildren.size()); }
1075};
1076
1077// clang-format off
1078/**
1079\class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1080\ingroup NTuple
1081\brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1082
1083Enumerate all cluster group IDs from the descriptor. No specific order can be assumed.
1084*/
1085// clang-format on
1087private:
1088 /// The associated RNTuple for this range.
1090
1091public:
1093 private:
1094 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1095 /// The wrapped map iterator
1097
1098 public:
1099 using iterator_category = std::forward_iterator_tag;
1102 using difference_type = std::ptrdiff_t;
1105
1106 RIterator(Iter_t iter) : fIter(iter) {}
1107 iterator &operator++() /* prefix */
1108 {
1109 ++fIter;
1110 return *this;
1111 }
1112 iterator operator++(int) /* postfix */
1113 {
1114 auto old = *this;
1115 operator++();
1116 return old;
1117 }
1118 reference operator*() const { return fIter->second; }
1119 pointer operator->() const { return &fIter->second; }
1120 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1121 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1122 };
1123
1125 RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1126 RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1127};
1128
1129// clang-format off
1130/**
1131\class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1132\ingroup NTuple
1133\brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1134
1135Enumerate all cluster IDs from all cluster descriptors. No specific order can be assumed, use
1136RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1137clusters by entry number.
1138*/
1139// clang-format on
1141private:
1142 /// The associated RNTuple for this range.
1144
1145public:
1147 private:
1148 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1149 /// The wrapped map iterator
1151
1152 public:
1153 using iterator_category = std::forward_iterator_tag;
1156 using difference_type = std::ptrdiff_t;
1159
1160 RIterator(Iter_t iter) : fIter(iter) {}
1161 iterator &operator++() /* prefix */
1162 {
1163 ++fIter;
1164 return *this;
1165 }
1166 iterator operator++(int) /* postfix */
1167 {
1168 auto old = *this;
1169 operator++();
1170 return old;
1171 }
1172 reference operator*() const { return fIter->second; }
1173 pointer operator->() const { return &fIter->second; }
1174 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1175 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1176 };
1177
1179 RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1180 RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1181};
1182
1183// clang-format off
1184/**
1185\class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1186\ingroup NTuple
1187\brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1188*/
1189// clang-format on
1191private:
1192 /// The associated RNTuple for this range.
1194
1195public:
1197 private:
1198 using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1199 /// The wrapped vector iterator
1201
1202 public:
1203 using iterator_category = std::forward_iterator_tag;
1206 using difference_type = std::ptrdiff_t;
1209
1210 RIterator(Iter_t iter) : fIter(iter) {}
1211 iterator &operator++() /* prefix */
1212 {
1213 ++fIter;
1214 return *this;
1215 }
1216 iterator operator++(int) /* postfix */
1217 {
1218 auto old = *this;
1219 operator++();
1220 return old;
1221 }
1222 reference operator*() const { return *fIter; }
1223 pointer operator->() const { return &*fIter; }
1224 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1225 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1226 };
1227
1229 RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1230 RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1231};
1232
1233namespace Experimental {
1234// clang-format off
1235/**
1236\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1237\ingroup NTuple
1238\brief Used to loop over all the Attribute Sets linked to an RNTuple
1239*/
1240// clang-format on
1241// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1243private:
1244 /// The associated RNTuple for this range.
1246
1247public:
1249 private:
1250 using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1251 /// The wrapped vector iterator
1253
1254 public:
1255 using iterator_category = std::forward_iterator_tag;
1258 using difference_type = std::ptrdiff_t;
1259 using pointer = const value_type *;
1260 using reference = const value_type &;
1261
1262 RIterator(Iter_t iter) : fIter(iter) {}
1263 iterator &operator++() /* prefix */
1264 {
1265 ++fIter;
1266 return *this;
1267 }
1268 iterator operator++(int) /* postfix */
1269 {
1270 auto old = *this;
1271 operator++();
1272 return old;
1273 }
1274 reference operator*() const { return *fIter; }
1275 pointer operator->() const { return &*fIter; }
1276 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1277 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1278 };
1279
1281 RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1282 RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1283};
1284} // namespace Experimental
1285
1286// clang-format off
1287/**
1288\class ROOT::RNTupleDescriptor::RHeaderExtension
1289\ingroup NTuple
1290\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1291*/
1292// clang-format on
1295
1296private:
1297 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1298 /// the fields in that order.
1299 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1300 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1301 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1302 /// belongs to a field of the regular header that gets extended by additional column representations.
1303 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1304 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1305 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1306 /// these columns need to be serialized in the extension header without re-serializing the field.
1307 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1308 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1309 std::uint32_t fNLogicalColumns = 0;
1310 std::uint32_t fNPhysicalColumns = 0;
1311
1312 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1313 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1314 /// Descriptor alongside non-extended fields.
1316 {
1317 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1318 fFieldIdsLookup.insert(fieldDesc.GetId());
1319 }
1320
1321 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1322 /// added through late model extension as an additional representation of an existing column). Note that the column
1323 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1325 {
1326 fNLogicalColumns++;
1327 if (!columnDesc.IsAliasColumn())
1328 fNPhysicalColumns++;
1329 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1330 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1331 }
1332 }
1333
1334public:
1335 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1336 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1337 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1338 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1339 {
1340 return fExtendedColumnRepresentations;
1341 }
1342 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1343 /// of their addition. Note that these fields are not necessarily top-level fields in the overall schema.
1344 /// If a nested field is extended, it will return the top-most field of the extended subtree.
1345 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1346 /// the field is not yet linked into the schema tree.
1347 std::vector<ROOT::DescriptorId_t> GetTopMostFields(const RNTupleDescriptor &desc) const;
1348
1350 {
1351 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1352 }
1354 {
1355 return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1356 fExtendedColumnRepresentations.end();
1357 }
1358};
1359
1360namespace Experimental::Internal {
1363
1364public:
1366 {
1367 fDesc.fName = name;
1368 return *this;
1369 }
1371 {
1372 fDesc.fSchemaVersionMajor = major;
1373 fDesc.fSchemaVersionMinor = minor;
1374 return *this;
1375 }
1377 {
1378 fDesc.fAnchorLocator = loc;
1379 return *this;
1380 }
1382 {
1383 fDesc.fAnchorLength = length;
1384 return *this;
1385 }
1386
1387 /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1388 /// was not given enough information to make a proper descriptor.
1390};
1391} // namespace Experimental::Internal
1392
1393namespace Internal {
1394
1395// clang-format off
1396/**
1397\class ROOT::Internal::RColumnDescriptorBuilder
1398\ingroup NTuple
1399\brief A helper class for piece-wise construction of an RColumnDescriptor
1400
1401Dangling column descriptors can become actual descriptors when added to an
1402RNTupleDescriptorBuilder instance and then linked to their fields.
1403*/
1404// clang-format on
1406private:
1408
1409public:
1410 /// Make an empty column descriptor builder.
1412
1424 {
1425 fColumn.fBitsOnStorage = bitsOnStorage;
1426 return *this;
1427 }
1429 {
1430 fColumn.fType = type;
1431 return *this;
1432 }
1434 {
1435 fColumn.fFieldId = fieldId;
1436 return *this;
1437 }
1439 {
1440 fColumn.fIndex = index;
1441 return *this;
1442 }
1444 {
1446 return *this;
1447 }
1449 {
1450 R__ASSERT(fColumn.fFirstElementIndex != 0);
1451 if (fColumn.fFirstElementIndex > 0)
1452 fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1453 return *this;
1454 }
1456 {
1458 return *this;
1459 }
1460 RColumnDescriptorBuilder &ValueRange(double min, double max)
1461 {
1462 fColumn.fValueRange = {min, max};
1463 return *this;
1464 }
1465 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1466 {
1467 fColumn.fValueRange = valueRange;
1468 return *this;
1469 }
1470 ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1472 /// Attempt to make a column descriptor. This may fail if the column
1473 /// was not given enough information to make a proper descriptor.
1474 RResult<RColumnDescriptor> MakeDescriptor() const;
1475};
1476
1477// clang-format off
1478/**
1479\class ROOT::Internal::RFieldDescriptorBuilder
1480\ingroup NTuple
1481\brief A helper class for piece-wise construction of an RFieldDescriptor
1482
1483Dangling field descriptors describe a single field in isolation. They are
1484missing the necessary relationship information (parent field, any child fields)
1485required to describe a real RNTuple field.
1486
1487Dangling field descriptors can only become actual descriptors when added to an
1488RNTupleDescriptorBuilder instance and then linked to other fields.
1489*/
1490// clang-format on
1492private:
1494
1495public:
1496 /// Make an empty dangling field descriptor.
1498
1499 /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1500 static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field);
1501
1503 {
1504 fField.fFieldId = fieldId;
1505 return *this;
1506 }
1508 {
1509 fField.fFieldVersion = fieldVersion;
1510 return *this;
1511 }
1513 {
1514 fField.fTypeVersion = typeVersion;
1515 return *this;
1516 }
1518 {
1519 fField.fParentId = id;
1520 return *this;
1521 }
1523 {
1524 fField.fProjectionSourceId = id;
1525 return *this;
1526 }
1528 {
1529 fField.fFieldName = fieldName;
1530 return *this;
1531 }
1533 {
1535 return *this;
1536 }
1537 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1538 {
1539 fField.fTypeName = typeName;
1540 return *this;
1541 }
1543 {
1544 fField.fTypeAlias = typeAlias;
1545 return *this;
1546 }
1548 {
1549 fField.fNRepetitions = nRepetitions;
1550 return *this;
1551 }
1553 {
1554 fField.fStructure = structure;
1555 return *this;
1556 }
1557 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1558 {
1559 fField.fTypeChecksum = typeChecksum;
1560 return *this;
1561 }
1563 {
1564 fField.fIsSoACollection = val;
1565 return *this;
1566 }
1567 ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1568 /// Attempt to make a field descriptor. This may fail if the dangling field
1569 /// was not given enough information to make a proper descriptor.
1570 RResult<RFieldDescriptor> MakeDescriptor() const;
1571};
1572
1573// clang-format off
1574/**
1575\class ROOT::Internal::RClusterDescriptorBuilder
1576\ingroup NTuple
1577\brief A helper class for piece-wise construction of an RClusterDescriptor
1578
1579The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1580piecewise addition of page locations.
1581*/
1582// clang-format on
1584private:
1586
1587public:
1589 {
1590 fCluster.fClusterId = clusterId;
1591 return *this;
1592 }
1593
1595 {
1597 return *this;
1598 }
1599
1601 {
1602 fCluster.fNEntries = nEntries;
1603 return *this;
1604 }
1605
1606 RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1608
1609 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1610 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1611 /// MarkSuppressedColumnRange() took place.
1612 RResult<void> MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId);
1613
1614 /// Sets the first element index and number of elements for all the suppressed column ranges.
1615 /// The information is taken from the corresponding columns from the primary representation.
1616 /// Needs to be called when all the columns (suppressed and regular) where added.
1617 RResult<void> CommitSuppressedColumnRanges(const RNTupleDescriptor &desc);
1618
1619 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1620 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1621 /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1622 /// should happen before calling this function.
1623 RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc);
1624
1629
1630 /// Move out the full cluster descriptor including page locations
1631 RResult<RClusterDescriptor> MoveDescriptor();
1632};
1633
1634// clang-format off
1635/**
1636\class ROOT::Internal::RClusterGroupDescriptorBuilder
1637\ingroup NTuple
1638\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1639*/
1640// clang-format on
1642private:
1644
1645public:
1648
1660 {
1661 fClusterGroup.fPageListLength = pageListLength;
1662 return *this;
1663 }
1665 {
1666 fClusterGroup.fMinEntry = minEntry;
1667 return *this;
1668 }
1670 {
1671 fClusterGroup.fEntrySpan = entrySpan;
1672 return *this;
1673 }
1675 {
1676 fClusterGroup.fNClusters = nClusters;
1677 return *this;
1678 }
1679 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1680 {
1681 if (clusterIds.size() != fClusterGroup.GetNClusters())
1682 throw RException(R__FAIL("mismatch of number of clusters"));
1683 fClusterGroup.fClusterIds = clusterIds;
1684 }
1685
1686 RResult<RClusterGroupDescriptor> MoveDescriptor();
1687};
1688
1689// clang-format off
1690/**
1691\class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1692\ingroup NTuple
1693\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1694*/
1695// clang-format on
1697private:
1699
1700public:
1702
1704 {
1705 fExtraTypeInfo.fContentId = contentId;
1706 return *this;
1707 }
1709 {
1710 fExtraTypeInfo.fTypeVersion = typeVersion;
1711 return *this;
1712 }
1713 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1714 {
1715 fExtraTypeInfo.fTypeName = typeName;
1716 return *this;
1717 }
1719 {
1720 fExtraTypeInfo.fContent = content;
1721 return *this;
1722 }
1723
1724 RResult<RExtraTypeInfoDescriptor> MoveDescriptor();
1725};
1726
1727// clang-format off
1728/**
1729\class ROOT::Internal::RNTupleDescriptorBuilder
1730\ingroup NTuple
1731\brief A helper class for piece-wise construction of an RNTupleDescriptor
1732
1733Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1734*/
1735// clang-format on
1737private:
1739 RResult<void> EnsureFieldExists(ROOT::DescriptorId_t fieldId) const;
1740
1741public:
1742 /// Checks whether invariants hold:
1743 /// * RNTuple epoch is valid
1744 /// * RNTuple name is valid
1745 /// * Fields have valid parents
1746 /// * Number of columns is constant across column representations
1747 RResult<void> EnsureValidDescriptor() const;
1748 const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
1749 RNTupleDescriptor MoveDescriptor();
1750
1751 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1752 /// This resets the builder's descriptor.
1753 void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1754
1755 void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1756 std::uint16_t versionPatch);
1757 void SetVersionForWriting();
1758
1759 void SetNTuple(const std::string_view name, const std::string_view description);
1760 void SetFeature(unsigned int flag);
1761
1762 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1763 void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1764 /// The real footer size also include the page list envelopes
1765 void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1766
1767 void AddField(const RFieldDescriptor &fieldDesc);
1770
1771 // The field that the column belongs to has to be already available. For fields with multiple columns,
1772 // the columns need to be added in order of the column index
1774
1777
1779 void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1780
1782
1783 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1784 /// annotated as begin part of the header extension.
1785 void BeginHeaderExtension();
1786
1787 /// \brief Shift column IDs of alias columns by `offset`
1788 ///
1789 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1790 /// (regular and projected fields), the natural column order would be
1791 /// - Physical and alias columns of piece one
1792 /// - Physical and alias columns of piece two
1793 /// - etc.
1794 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1795 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1796 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1797 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1798 /// - Physical columns of piece one
1799 /// - Physical columns of piece two
1800 /// - ...
1801 // - Logical columns of piece one
1802 /// - Logical columns of piece two
1803 /// - ...
1804 void ShiftAliasColumns(std::uint32_t offset);
1805};
1806
1808{
1809 return desc.CloneSchema();
1810}
1811
1812/// Tells if the field describes a user-defined enum type.
1813/// The dictionary does not need to be available for this method.
1814/// Needs the full descriptor to look up sub fields.
1816
1817/// Tells if the field describes a std::atomic<T> type
1819
1820} // namespace Internal
1821
1822} // namespace ROOT
1823
1824#endif // ROOT_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:145
The available trivial, native content types of a column.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
std::vector< RNTupleAttrSetDescriptor >::const_iterator Iter_t
Used to loop over all the Attribute Sets linked to an RNTuple.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple)
Metadata stored for every Attribute Set linked to an RNTuple.
RNTupleAttrSetDescriptor & operator=(const RNTupleAttrSetDescriptor &other)=delete
bool operator==(const RNTupleAttrSetDescriptor &other) const
std::uint32_t fAnchorLength
uncompressed size of the linked anchor
RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other)=delete
RNTupleAttrSetDescriptor & operator=(RNTupleAttrSetDescriptor &&other)=default
const RNTupleLocator & GetAnchorLocator() const
bool operator!=(const RNTupleAttrSetDescriptor &other) const
RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other)=default
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
A helper class for piece-wise construction of an RColumnDescriptor.
ROOT::DescriptorId_t GetRepresentationIndex() const
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & IsSoACollection(bool val)
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
A helper class for piece-wise construction of an RNTupleDescriptor.
const RNTupleDescriptor & GetDescriptor() const
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
RColumnRangeIterable(const RClusterDescriptor &desc)
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
ROOT::DescriptorId_t GetPhysicalColumnId() const
bool operator==(const RColumnRange &other) const
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
std::optional< std::uint32_t > GetCompressionSettings() const
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t GetFirstElementIndex() const
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
void IncrementFirstElementIndex(ROOT::NTupleSize_t by)
bool Contains(ROOT::NTupleSize_t index) const
void IncrementNElements(ROOT::NTupleSize_t by)
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(const RPageRange &other)=delete
std::unique_ptr< std::vector< ROOT::NTupleSize_t > > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
RPageRange(RPageRange &&other)=default
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
bool operator==(const RPageRange &other) const
ROOT::DescriptorId_t GetPhysicalColumnId() const
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
RPageRange & operator=(RPageRange &&other)=default
std::vector< RPageInfo > & GetPageInfos()
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
ROOT::DescriptorId_t GetId() const
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
ROOT::DescriptorId_t GetId() const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetPageListLength() const
RNTupleLocator GetPageListLocator() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
Metadata stored for every column of an RNTuple.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(const RColumnDescriptor &other)=delete
std::uint64_t GetFirstElementIndex() const
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t GetFieldId() const
RColumnDescriptor(RColumnDescriptor &&other)=default
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
std::uint32_t GetIndex() const
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::ENTupleColumnType GetType() const
ROOT::DescriptorId_t GetPhysicalId() const
std::uint16_t GetRepresentationIndex() const
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t GetBitsOnStorage() const
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
const std::string & GetContent() const
const std::string & GetTypeName() const
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
EExtraTypeInfoIds GetContentId() const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
Metadata stored for every field of an RNTuple.
const std::string & GetTypeAlias() const
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t GetId() const
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t GetFieldVersion() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool IsCustomEnum(const RNTupleDescriptor &desc) const R__DEPRECATED(6
ROOT::ENTupleStructure GetStructure() const
bool operator==(const RFieldDescriptor &other) const
RFieldDescriptor(const RFieldDescriptor &other)=delete
bool IsCustomClass() const R__DEPRECATED(6
std::uint32_t GetColumnCardinality() const
bool IsStdAtomic() const R__DEPRECATED(6
std::string fFieldDescription
Free text set by the user.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
ROOT::DescriptorId_t GetParentId() const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint64_t GetNRepetitions() const
RFieldDescriptor(RFieldDescriptor &&other)=default
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
const std::string & GetFieldDescription() const
std::optional< std::uint32_t > GetTypeChecksum() const
ROOT::DescriptorId_t GetProjectionSourceId() const
bool fIsSoACollection
Indicates if this is a collection that should be represented in memory by a SoA layout.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint32_t GetTypeVersion() const
const std::string & GetFieldName() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::string & GetTypeName() const
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor >::const_iterator Iter_t
Used to loop over all the clusters of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor >::const_iterator Iter_t
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
const std::vector< ROOT::DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &columns, std::size_t index)
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
std::vector< RExtraTypeInfoDescriptor >::const_iterator Iter_t
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &fieldChildren, std::size_t index)
const std::vector< ROOT::DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
Used to loop over a field's child fields.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
The on-storage metadata of an RNTuple.
std::uint64_t GetGeneration() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::vector< Experimental::RNTupleAttrSetDescriptor > fAttributeSets
List of AttributeSets linked to this RNTuple.
std::size_t GetNExtraTypeInfos() const
std::uint64_t GetOnDiskFooterSize() const
std::size_t GetNActiveClusters() const
const std::string & GetName() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNAttributeSets() const
std::size_t GetNClusters() const
std::size_t GetNPhysicalColumns() const
EFeatureFlags
All known feature flags.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t GetVersion() const
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderXxHash3() const
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::size_t GetNFields() const
bool HasFeature(unsigned int flag) const
std::uint64_t GetOnDiskHeaderSize() const
std::string fDescription
Free text from the user.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
std::size_t GetNLogicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNClusterGroups() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
const std::string & GetDescription() const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RFieldDescriptor & GetFieldZero() const
Generic information about the physical location of data.
const Int_t n
Definition legend1.C:16
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc)
Tells if the field describes a user-defined enum type.
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc)
Tells if the field describes a std::atomic<T> type.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
Additional information about a page in an in-memory RPageRange.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage)
void SetPageNumber(ROOT::NTupleSize_t pageNumber)
Information about a single page in the context of a cluster's page range.
void SetLocator(const RNTupleLocator &locator)
bool operator==(const RPageInfo &other) const
const RNTupleLocator & GetLocator() const
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
bool operator==(RValueRange other) const
RValueRange(std::pair< double, double > range)
bool operator!=(RValueRange other) const
static void output()