Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleDescriptor
16#define ROOT_RNTupleDescriptor
17
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleTypes.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <TError.h>
25
26#include <algorithm>
27#include <chrono>
28#include <cmath>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <optional>
34#include <ostream>
35#include <vector>
36#include <set>
37#include <string>
38#include <string_view>
39#include <unordered_map>
40#include <unordered_set>
41
42namespace ROOT {
43
44class RFieldBase;
45class RNTupleModel;
46
47namespace Internal {
48class RColumnElementBase;
49}
50
51class RNTupleDescriptor;
52
53namespace Internal {
54class RColumnDescriptorBuilder;
55class RClusterDescriptorBuilder;
56class RClusterGroupDescriptorBuilder;
57class RExtraTypeInfoDescriptorBuilder;
58class RFieldDescriptorBuilder;
59class RNTupleDescriptorBuilder;
60
61RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
66
67std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
68} // namespace Internal
69
70namespace Experimental {
71
72// clang-format off
73/**
74\class ROOT::Experimental::RNTupleAttrSetDescriptor
75\ingroup NTuple
76\brief Metadata stored for every Attribute Set linked to an RNTuple.
77*/
78// clang-format on
81
82 std::uint16_t fSchemaVersionMajor = 0;
83 std::uint16_t fSchemaVersionMinor = 0;
84 std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
85 // The locator of the AttributeSet anchor.
86 // In case of kTypeFile, it points to the beginning of the Anchor's payload.
87 // NOTE: Only kTypeFile is supported at the moment.
89 std::string fName;
90
91public:
97
98 bool operator==(const RNTupleAttrSetDescriptor &other) const;
99 bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
100
101 const std::string &GetName() const { return fName; }
102 std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
103 std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
104 std::uint32_t GetAnchorLength() const { return fAnchorLength; }
106
108};
109
110class RNTupleAttrSetDescriptorIterable;
111
112} // namespace Experimental
113
114// clang-format off
115/**
116\class ROOT::RFieldDescriptor
117\ingroup NTuple
118\brief Metadata stored for every field of an RNTuple
119*/
120// clang-format on
124
125private:
127 /// The version of the C++-type-to-column translation mechanics
128 std::uint32_t fFieldVersion = 0;
129 /// The version of the C++ type itself
130 std::uint32_t fTypeVersion = 0;
131 /// The leaf name, not including parent fields
132 std::string fFieldName;
133 /// Free text set by the user
134 std::string fFieldDescription;
135 /// The C++ type that was used when writing the field
136 std::string fTypeName;
137 /// A typedef or using directive that resolved to the type name during field creation
138 std::string fTypeAlias;
139 /// The number of elements per entry for fixed-size arrays
140 std::uint64_t fNRepetitions = 0;
141 /// The structural information carried by this field in the data model tree
143 /// Establishes sub field relationships, such as classes and collections
145 /// For projected fields, the source field ID
147 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
148 /// order of sub fields.
149 std::vector<ROOT::DescriptorId_t> fLinkIds;
150 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
151 /// list of logical column ids. For example, the second column of the third column representation is
152 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
153 std::uint32_t fColumnCardinality = 0;
154 /// The ordered list of columns attached to this field: first by representation index then by column index.
155 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
156 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
157 /// identify types by their checksum
158 std::optional<std::uint32_t> fTypeChecksum;
159
160public:
161 RFieldDescriptor() = default;
166
167 bool operator==(const RFieldDescriptor &other) const;
168 /// Get a copy of the descriptor
169 RFieldDescriptor Clone() const;
170
171 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
172 /// access to sub fields, which is provided by the RNTupleDescriptor argument.
173 std::unique_ptr<ROOT::RFieldBase>
174 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
175
177 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
178 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
179 const std::string &GetFieldName() const { return fFieldName; }
180 const std::string &GetFieldDescription() const { return fFieldDescription; }
181 const std::string &GetTypeName() const { return fTypeName; }
182 const std::string &GetTypeAlias() const { return fTypeAlias; }
183 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
187 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
188 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
189 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
190 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
192 /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
193 /// natively supported stdlib classes.
194 /// The dictionary does not need to be available for this method.
195 bool IsCustomClass() const;
196 /// Tells if the field describes a user-defined enum type.
197 /// The dictionary does not need to be available for this method.
198 /// Needs the full descriptor to look up sub fields.
199 bool IsCustomEnum(const RNTupleDescriptor &desc) const;
200 bool IsStdAtomic() const;
201};
202
203// clang-format off
204/**
205\class ROOT::RColumnDescriptor
206\ingroup NTuple
207\brief Metadata stored for every column of an RNTuple
208*/
209// clang-format on
213
214public:
215 struct RValueRange {
216 double fMin = 0, fMax = 0;
217
218 RValueRange() = default;
219 RValueRange(double min, double max) : fMin(min), fMax(max) {}
220 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
221
222 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
223 bool operator!=(RValueRange other) const { return !(*this == other); }
224 };
225
226private:
227 /// The actual column identifier, which is the link to the corresponding field
229 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
231 /// Every column belongs to one and only one field
233 /// The absolute value specifies the index for the first stored element for this column.
234 /// For deferred columns the absolute value is larger than zero.
235 /// Negative values specify a suppressed and deferred column.
236 std::int64_t fFirstElementIndex = 0U;
237 /// A field can be serialized into several columns, which are numbered from zero to $n$
238 std::uint32_t fIndex = 0;
239 /// A field may use multiple column representations, which are numbered from zero to $m$.
240 /// Every representation has the same number of columns.
241 std::uint16_t fRepresentationIndex = 0;
242 /// The size in bits of elements of this column. Most columns have the size fixed by their type
243 /// but low-precision float columns have variable bit widths.
244 std::uint16_t fBitsOnStorage = 0;
245 /// The on-disk column type
247 /// Optional value range (used e.g. by quantized real fields)
248 std::optional<RValueRange> fValueRange;
249
250public:
251 RColumnDescriptor() = default;
256
257 bool operator==(const RColumnDescriptor &other) const;
258 /// Get a copy of the descriptor
259 RColumnDescriptor Clone() const;
260
264 std::uint32_t GetIndex() const { return fIndex; }
265 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
266 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
267 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
269 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
271 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
273};
274
275// clang-format off
276/**
277\class ROOT::RClusterDescriptor
278\ingroup NTuple
279\brief Metadata for RNTuple clusters
280
281The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
282In a second phase, the event range, column group, page locations and column ranges are added.
283Both phases are populated by the RClusterDescriptorBuilder.
284Clusters span across all available columns in the RNTuple.
285*/
286// clang-format on
289
290public:
291 // clang-format off
292 /**
293 \class ROOT::RClusterDescriptor::RColumnRange
294 \ingroup NTuple
295 \brief The window of element indexes of a particular column in a particular cluster
296 */
297 // clang-format on
300 /// The global index of the first column element in the cluster
302 /// The number of column elements in the cluster
304 /// The usual format for ROOT compression settings (see Compression.h).
305 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
306 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
307 std::optional<std::uint32_t> fCompressionSettings;
308 /// Suppressed columns have an empty page range and unknown compression settings.
309 /// Their element index range, however, is aligned with the corresponding column of the
310 /// primary column representation (see Section "Suppressed Columns" in the specification)
311 bool fIsSuppressed = false;
312
313 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
314 // Should this be done on the field level?
315
316 public:
317 RColumnRange() = default;
318
329
332
336
340
341 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
342 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
343
344 bool IsSuppressed() const { return fIsSuppressed; }
346
347 bool operator==(const RColumnRange &other) const
348 {
349 return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
350 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
351 fIsSuppressed == other.fIsSuppressed;
352 }
353
358 };
359
360 // clang-format off
361 /**
362 \class ROOT::RClusterDescriptor::RPageInfo
363 \ingroup NTuple
364 \brief Information about a single page in the context of a cluster's page range.
365 */
366 // clang-format on
367 // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
368 // the page belongs
369 struct RPageInfo {
370 private:
371 /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
372 std::uint32_t fNElements = std::uint32_t(-1);
373 /// The meaning of `fLocator` depends on the storage backend.
375 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
376 bool fHasChecksum = false;
377
378 public:
379 RPageInfo() = default;
384
385 bool operator==(const RPageInfo &other) const
386 {
387 return fNElements == other.fNElements && fLocator == other.fLocator;
388 }
389
390 std::uint32_t GetNElements() const { return fNElements; }
391 void SetNElements(std::uint32_t n) { fNElements = n; }
392
393 const RNTupleLocator &GetLocator() const { return fLocator; }
396
397 bool HasChecksum() const { return fHasChecksum; }
399 };
400
401 // clang-format off
402 /**
403 \class ROOT::RClusterDescriptor::RPageInfoExtended
404 \ingroup NTuple
405 \brief Additional information about a page in an in-memory RPageRange.
406
407 Used by RPageRange::Find() to return information relative to the RPageRange. This information is not stored on disk
408 and we don't need to keep it in memory because it can be easily recomputed.
409 */
410 // clang-format on
431
432 // clang-format off
433 /**
434 \class ROOT::RClusterDescriptor::RPageRange
435 \ingroup NTuple
436 \brief Records the partition of data into pages for a particular column in a particular cluster
437 */
438 // clang-format on
441
442 private:
443 /// \brief Extend this RPageRange to fit the given RColumnRange.
444 ///
445 /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
446 /// RPageInfos are constructed to contain as many elements of type `element` given a page size
447 /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
448 /// This function is used to make up RPageRanges for clusters that contain deferred columns.
449 /// \return The number of column elements covered by the synthesized RPageInfos
452
453 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
454 /// up to and including a given index. Used for binary search in Find().
455 std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
456
458 std::vector<RPageInfo> fPageInfos;
459
460 public:
461 RPageRange() = default;
462 RPageRange(const RPageRange &other) = delete;
466
468 {
469 RPageRange clone;
471 clone.fPageInfos = fPageInfos;
473 return clone;
474 }
475
476 /// Find the page in the RPageRange that contains the given element. The element must exist.
478
481
482 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
483 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
484
485 bool operator==(const RPageRange &other) const
486 {
487 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
488 }
489 };
490
491private:
493 /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
496
497 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
498 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
499
500public:
502
508
510
511 bool operator==(const RClusterDescriptor &other) const;
512
518 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
519 RColumnRangeIterable GetColumnRangeIterable() const;
521 {
522 return fColumnRanges.find(physicalId) != fColumnRanges.end();
523 }
524 std::uint64_t GetNBytesOnStorage() const;
525};
526
528private:
530
531public:
533 private:
534 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
535 /// The wrapped map iterator
537
538 public:
539 using iterator_category = std::forward_iterator_tag;
542 using difference_type = std::ptrdiff_t;
543 using pointer = const RColumnRange *;
544 using reference = const RColumnRange &;
545
546 RIterator(Iter_t iter) : fIter(iter) {}
547 iterator &operator++() /* prefix */
548 {
549 ++fIter;
550 return *this;
551 }
552 iterator operator++(int) /* postfix */
553 {
554 auto old = *this;
555 operator++();
556 return old;
557 }
558 reference operator*() const { return fIter->second; }
559 pointer operator->() const { return &fIter->second; }
560 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
561 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
562 };
563
564 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
565
566 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
567 RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
568 size_t size() { return fDesc.fColumnRanges.size(); }
569};
570
571// clang-format off
572/**
573\class ROOT::RClusterGroupDescriptor
574\ingroup NTuple
575\brief Clusters are bundled in cluster groups.
576
577Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
578Every RNTuple has at least one cluster group. The clusters in a cluster group are ordered
579corresponding to their first entry number.
580*/
581// clang-format on
584
585private:
587 /// The cluster IDs can be empty if the corresponding page list is not loaded.
588 /// Otherwise, cluster ids are sorted by first entry number.
589 std::vector<ROOT::DescriptorId_t> fClusterIds;
590 /// The page list that corresponds to the cluster group
592 /// Uncompressed size of the page list
593 std::uint64_t fPageListLength = 0;
594 /// The minimum first entry number of the clusters in the cluster group
595 std::uint64_t fMinEntry = 0;
596 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
597 std::uint64_t fEntrySpan = 0;
598 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
599 std::uint32_t fNClusters = 0;
600
601public:
607
609 /// Creates a clone without the cluster IDs
611
612 bool operator==(const RClusterGroupDescriptor &other) const;
613
615 std::uint32_t GetNClusters() const { return fNClusters; }
617 std::uint64_t GetPageListLength() const { return fPageListLength; }
618 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
619 std::uint64_t GetMinEntry() const { return fMinEntry; }
620 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
621 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
622 /// Stage two loads the list of cluster IDs.
623 bool HasClusterDetails() const { return !fClusterIds.empty(); }
624};
625
626/// Used in RExtraTypeInfoDescriptor
628 kInvalid,
630};
631
632// clang-format off
633/**
634\class ROOT::RExtraTypeInfoDescriptor
635\ingroup NTuple
636\brief Field specific extra type information from the header / extenstion header
637
638Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
639*/
640// clang-format on
643
644private:
645 /// Specifies the meaning of the extra information
647 /// Type version the extra type information is bound to
648 std::uint32_t fTypeVersion = 0;
649 /// The type name the extra information refers to; empty for RNTuple-wide extra information
650 std::string fTypeName;
651 /// The content format depends on the content ID and may be binary
652 std::string fContent;
653
654public:
660
661 bool operator==(const RExtraTypeInfoDescriptor &other) const;
662
664
666 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
667 const std::string &GetTypeName() const { return fTypeName; }
668 const std::string &GetContent() const { return fContent; }
669};
670
671// clang-format off
672/**
673\class ROOT::RNTupleDescriptor
674\ingroup NTuple
675\brief The on-storage metadata of an RNTuple
676
677Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
678potentially multiple page lists.
679The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
680The footer carries information about one or several cluster groups and links to their page lists.
681For every cluster group, a page list envelope stores cluster summaries and page locations.
682For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
683locations.
684
685The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
686for RNTuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
687
688The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
689the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
690Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
691and backward compatibility when the metadata evolves.
692*/
693// clang-format on
697
698public:
699 class RHeaderExtension;
700
701private:
702 /// The RNTuple name needs to be unique in a given storage location (file)
703 std::string fName;
704 /// Free text from the user
705 std::string fDescription;
706
708
709 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
710
711 std::set<unsigned int> fFeatureFlags;
712 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
713 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
714
715 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
716 std::unique_ptr<RHeaderExtension> fHeaderExtension;
717
718 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
719 //// (see CloneSchema())
720
721 std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
722 std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
723 std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
724 std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
725
726 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
727 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
728 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
729
730 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
731 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
732
733 /// \brief The generation of the descriptor
734 ///
735 /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
736 /// active page locations. During the lifetime of the descriptor, page location information for clusters
737 /// can be added or removed. When this happens, the generation should be increased, so that users of the
738 /// descriptor know that the information changed. The generation is increased, e.g., by the page source's
739 /// exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
740 std::uint64_t fGeneration = 0;
741
742 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
743 /// References cluster groups sorted by entry range and thus allows for binary search.
744 /// Note that this list is empty during the descriptor building process and will only be
745 /// created when the final descriptor is extracted from the builder.
746 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
747 /// Potentially a subset of all the available clusters
748 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
749 /// List of AttributeSets linked to this RNTuple
750 std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
751
752 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
754
755 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
756 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
757 /// when merging two RNTuples.
759
760public:
761 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
762
769
770 /// Modifiers passed to CreateModel()
772 private:
773 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
774 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
776 /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
777 /// or an unknown field structural role.
778 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
779 /// or indirectly) will be skipped instead.
780 bool fForwardCompatible = false;
781 /// If true, the model will be created without a default entry (bare model).
782 bool fCreateBare = false;
783 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
784 /// as record fields from the on-disk information; otherwise, they will cause an error.
786
787 public:
788 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
789
792
795
796 void SetCreateBare(bool v) { fCreateBare = v; }
797 bool GetCreateBare() const { return fCreateBare; }
798
801 };
802
803 RNTupleDescriptor() = default;
808
809 RNTupleDescriptor Clone() const;
810
811 bool operator==(const RNTupleDescriptor &other) const;
812
813 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
814 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
815 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
816
833
834 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
835 RFieldDescriptorIterable
837 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
838 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
839 RFieldDescriptorIterable
841 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
842
843 RFieldDescriptorIterable GetTopLevelFields() const;
844 RFieldDescriptorIterable
845 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
846
847 RColumnDescriptorIterable GetColumnIterable() const;
848 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
849 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
850
851 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
852
853 RClusterDescriptorIterable GetClusterIterable() const;
854
855 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
856
858
859 const std::string &GetName() const { return fName; }
860 const std::string &GetDescription() const { return fDescription; }
861
862 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
863 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
864 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
865 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
866 std::size_t GetNClusters() const { return fNClusters; }
867 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
868 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
869 std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
870
871 /// We know the number of entries from adding the cluster summaries
874
875 /// Returns the logical parent of all top-level RNTuple data fields.
879 /// Searches for a top-level field
880 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
882 std::uint16_t representationIndex) const;
884 std::uint16_t representationIndex) const;
888
889 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
890 /// In case of invalid field ID, an empty string is returned.
892
893 /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
894 std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
895
896 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
897 std::vector<std::uint64_t> GetFeatureFlags() const;
898
899 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
900 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
901
902 /// Methods to load and drop cluster group details (cluster IDs and page locations)
906
907 std::uint64_t GetGeneration() const { return fGeneration; }
909
910 /// Re-create the C++ model from the stored metadata
911 std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
912 void PrintInfo(std::ostream &output) const;
913};
914
915// clang-format off
916/**
917\class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
918\ingroup NTuple
919\brief Used to loop over a field's associated columns
920*/
921// clang-format on
923private:
924 /// The associated RNTuple for this range.
926 /// The descriptor ids of the columns ordered by field, representation, and column index
927 std::vector<ROOT::DescriptorId_t> fColumns = {};
928
929public:
931 private:
932 /// The enclosing range's RNTuple.
934 /// The enclosing range's descriptor id list.
935 const std::vector<ROOT::DescriptorId_t> &fColumns;
936 std::size_t fIndex = 0;
937
938 public:
939 using iterator_category = std::forward_iterator_tag;
942 using difference_type = std::ptrdiff_t;
943 using pointer = const RColumnDescriptor *;
945
946 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
948 {
949 }
950 iterator &operator++() /* prefix */
951 {
952 ++fIndex;
953 return *this;
954 }
955 iterator operator++(int) /* postfix */
956 {
957 auto old = *this;
958 operator++();
959 return old;
960 }
961 reference operator*() const { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
962 pointer operator->() const { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
963 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
964 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
965 };
966
969
972 size_t size() { return fColumns.size(); }
973};
974
975// clang-format off
976/**
977\class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
978\ingroup NTuple
979\brief Used to loop over a field's child fields
980*/
981// clang-format on
983private:
984 /// The associated RNTuple for this range.
986 /// The descriptor IDs of the child fields. These may be sorted using
987 /// a comparison function.
988 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
989
990public:
992 private:
993 /// The enclosing range's RNTuple.
995 /// The enclosing range's descriptor id list.
996 const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
997 std::size_t fIndex = 0;
998
999 public:
1000 using iterator_category = std::forward_iterator_tag;
1003 using difference_type = std::ptrdiff_t;
1004 using pointer = const RFieldDescriptor *;
1006
1007 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
1008 std::size_t index)
1010 {
1011 }
1012 iterator &operator++() /* prefix */
1013 {
1014 ++fIndex;
1015 return *this;
1016 }
1017 iterator operator++(int) /* postfix */
1018 {
1019 auto old = *this;
1020 operator++();
1021 return old;
1022 }
1023 reference operator*() const { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1024 pointer operator->() const { return &fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1025 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1026 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1027 };
1032 /// Sort the range using an arbitrary comparison function.
1034 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1035 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1036 {
1037 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1038 }
1041};
1042
1043// clang-format off
1044/**
1045\class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1046\ingroup NTuple
1047\brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1048
1049Enumerate all cluster group IDs from the descriptor. No specific order can be assumed.
1050*/
1051// clang-format on
1053private:
1054 /// The associated RNTuple for this range.
1056
1057public:
1059 private:
1060 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1061 /// The wrapped map iterator
1063
1064 public:
1065 using iterator_category = std::forward_iterator_tag;
1068 using difference_type = std::ptrdiff_t;
1071
1072 RIterator(Iter_t iter) : fIter(iter) {}
1073 iterator &operator++() /* prefix */
1074 {
1075 ++fIter;
1076 return *this;
1077 }
1078 iterator operator++(int) /* postfix */
1079 {
1080 auto old = *this;
1081 operator++();
1082 return old;
1083 }
1084 reference operator*() const { return fIter->second; }
1085 pointer operator->() const { return &fIter->second; }
1086 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1087 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1088 };
1089
1091 RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1092 RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1093};
1094
1095// clang-format off
1096/**
1097\class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1098\ingroup NTuple
1099\brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1100
1101Enumerate all cluster IDs from all cluster descriptors. No specific order can be assumed, use
1102RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1103clusters by entry number.
1104*/
1105// clang-format on
1107private:
1108 /// The associated RNTuple for this range.
1110
1111public:
1113 private:
1114 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1115 /// The wrapped map iterator
1117
1118 public:
1119 using iterator_category = std::forward_iterator_tag;
1122 using difference_type = std::ptrdiff_t;
1125
1126 RIterator(Iter_t iter) : fIter(iter) {}
1127 iterator &operator++() /* prefix */
1128 {
1129 ++fIter;
1130 return *this;
1131 }
1132 iterator operator++(int) /* postfix */
1133 {
1134 auto old = *this;
1135 operator++();
1136 return old;
1137 }
1138 reference operator*() const { return fIter->second; }
1139 pointer operator->() const { return &fIter->second; }
1140 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1141 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1142 };
1143
1145 RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1146 RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1147};
1148
1149// clang-format off
1150/**
1151\class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1152\ingroup NTuple
1153\brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1154*/
1155// clang-format on
1157private:
1158 /// The associated RNTuple for this range.
1160
1161public:
1163 private:
1164 using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1165 /// The wrapped vector iterator
1167
1168 public:
1169 using iterator_category = std::forward_iterator_tag;
1172 using difference_type = std::ptrdiff_t;
1175
1176 RIterator(Iter_t iter) : fIter(iter) {}
1177 iterator &operator++() /* prefix */
1178 {
1179 ++fIter;
1180 return *this;
1181 }
1182 iterator operator++(int) /* postfix */
1183 {
1184 auto old = *this;
1185 operator++();
1186 return old;
1187 }
1188 reference operator*() const { return *fIter; }
1189 pointer operator->() const { return &*fIter; }
1190 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1191 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1192 };
1193
1195 RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1196 RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1197};
1198
1199namespace Experimental {
1200// clang-format off
1201/**
1202\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1203\ingroup NTuple
1204\brief Used to loop over all the Attribute Sets linked to an RNTuple
1205*/
1206// clang-format on
1207// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1209private:
1210 /// The associated RNTuple for this range.
1212
1213public:
1215 private:
1216 using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1217 /// The wrapped vector iterator
1219
1220 public:
1221 using iterator_category = std::forward_iterator_tag;
1224 using difference_type = std::ptrdiff_t;
1225 using pointer = const value_type *;
1226 using reference = const value_type &;
1227
1228 RIterator(Iter_t iter) : fIter(iter) {}
1229 iterator &operator++() /* prefix */
1230 {
1231 ++fIter;
1232 return *this;
1233 }
1234 iterator operator++(int) /* postfix */
1235 {
1236 auto old = *this;
1237 operator++();
1238 return old;
1239 }
1240 reference operator*() const { return *fIter; }
1241 pointer operator->() const { return &*fIter; }
1242 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1243 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1244 };
1245
1247 RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1248 RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1249};
1250} // namespace Experimental
1251
1252// clang-format off
1253/**
1254\class ROOT::RNTupleDescriptor::RHeaderExtension
1255\ingroup NTuple
1256\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1257*/
1258// clang-format on
1261
1262private:
1263 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1264 /// the fields in that order.
1265 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1266 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1267 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1268 /// belongs to a field of the regular header that gets extended by additional column representations.
1269 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1270 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1271 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1272 /// these columns need to be serialized in the extension header without re-serializing the field.
1273 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1274 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1275 std::uint32_t fNLogicalColumns = 0;
1276 std::uint32_t fNPhysicalColumns = 0;
1277
1278 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1279 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1280 /// Descriptor alongside non-extended fields.
1282 {
1283 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1284 fFieldIdsLookup.insert(fieldDesc.GetId());
1285 }
1286
1287 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1288 /// added through late model extension as an additional representation of an existing column). Note that the column
1289 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1291 {
1293 if (!columnDesc.IsAliasColumn())
1295 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1296 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1297 }
1298 }
1299
1300public:
1301 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1302 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1303 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1304 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1305 {
1307 }
1308 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1309 /// of their addition.
1310 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1311 /// the field is not yet linked into the schema tree.
1312 std::vector<ROOT::DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1313
1315 {
1316 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1317 }
1323};
1324
1325namespace Experimental::Internal {
1328
1329public:
1331 {
1332 fDesc.fName = name;
1333 return *this;
1334 }
1336 {
1339 return *this;
1340 }
1347 {
1349 return *this;
1350 }
1351
1352 /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1353 /// was not given enough information to make a proper descriptor.
1355};
1356} // namespace Experimental::Internal
1357
1358namespace Internal {
1359
1360// clang-format off
1361/**
1362\class ROOT::Internal::RColumnDescriptorBuilder
1363\ingroup NTuple
1364\brief A helper class for piece-wise construction of an RColumnDescriptor
1365
1366Dangling column descriptors can become actual descriptors when added to an
1367RNTupleDescriptorBuilder instance and then linked to their fields.
1368*/
1369// clang-format on
1371private:
1373
1374public:
1375 /// Make an empty column descriptor builder.
1377
1389 {
1391 return *this;
1392 }
1394 {
1395 fColumn.fType = type;
1396 return *this;
1397 }
1404 {
1406 return *this;
1407 }
1425 RColumnDescriptorBuilder &ValueRange(double min, double max)
1426 {
1427 fColumn.fValueRange = {min, max};
1428 return *this;
1429 }
1430 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1431 {
1433 return *this;
1434 }
1437 /// Attempt to make a column descriptor. This may fail if the column
1438 /// was not given enough information to make a proper descriptor.
1440};
1441
1442// clang-format off
1443/**
1444\class ROOT::Internal::RFieldDescriptorBuilder
1445\ingroup NTuple
1446\brief A helper class for piece-wise construction of an RFieldDescriptor
1447
1448Dangling field descriptors describe a single field in isolation. They are
1449missing the necessary relationship information (parent field, any child fields)
1450required to describe a real RNTuple field.
1451
1452Dangling field descriptors can only become actual descriptors when added to an
1453RNTupleDescriptorBuilder instance and then linked to other fields.
1454*/
1455// clang-format on
1457private:
1459
1460public:
1461 /// Make an empty dangling field descriptor.
1463
1464 /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1466
1473 {
1475 return *this;
1476 }
1478 {
1480 return *this;
1481 }
1483 {
1485 return *this;
1486 }
1493 {
1495 return *this;
1496 }
1498 {
1500 return *this;
1501 }
1502 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1503 {
1504 fField.fTypeName = typeName;
1505 return *this;
1506 }
1508 {
1510 return *this;
1511 }
1513 {
1515 return *this;
1516 }
1518 {
1519 fField.fStructure = structure;
1520 return *this;
1521 }
1522 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1523 {
1525 return *this;
1526 }
1528 /// Attempt to make a field descriptor. This may fail if the dangling field
1529 /// was not given enough information to make a proper descriptor.
1531};
1532
1533// clang-format off
1534/**
1535\class ROOT::Internal::RClusterDescriptorBuilder
1536\ingroup NTuple
1537\brief A helper class for piece-wise construction of an RClusterDescriptor
1538
1539The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1540piecewise addition of page locations.
1541*/
1542// clang-format on
1544private:
1546
1547public:
1553
1559
1561 {
1563 return *this;
1564 }
1565
1568
1569 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1570 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1571 /// MarkSuppressedColumnRange() took place.
1573
1574 /// Sets the first element index and number of elements for all the suppressed column ranges.
1575 /// The information is taken from the corresponding columns from the primary representation.
1576 /// Needs to be called when all the columns (suppressed and regular) where added.
1578
1579 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1580 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1581 /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1582 /// should happen before calling this function.
1584
1589
1590 /// Move out the full cluster descriptor including page locations
1592};
1593
1594// clang-format off
1595/**
1596\class ROOT::Internal::RClusterGroupDescriptorBuilder
1597\ingroup NTuple
1598\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1599*/
1600// clang-format on
1602private:
1604
1605public:
1608
1625 {
1627 return *this;
1628 }
1630 {
1632 return *this;
1633 }
1635 {
1637 return *this;
1638 }
1639 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1640 {
1641 if (clusterIds.size() != fClusterGroup.GetNClusters())
1642 throw RException(R__FAIL("mismatch of number of clusters"));
1644 }
1645
1647};
1648
1649// clang-format off
1650/**
1651\class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1652\ingroup NTuple
1653\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1654*/
1655// clang-format on
1657private:
1659
1660public:
1662
1669 {
1671 return *this;
1672 }
1673 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1674 {
1675 fExtraTypeInfo.fTypeName = typeName;
1676 return *this;
1677 }
1679 {
1681 return *this;
1682 }
1683
1685};
1686
1687// clang-format off
1688/**
1689\class ROOT::Internal::RNTupleDescriptorBuilder
1690\ingroup NTuple
1691\brief A helper class for piece-wise construction of an RNTupleDescriptor
1692
1693Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1694*/
1695// clang-format on
1697private:
1700
1701public:
1702 /// Checks whether invariants hold:
1703 /// * RNTuple epoch is valid
1704 /// * RNTuple name is valid
1705 /// * Fields have valid parents
1706 /// * Number of columns is constant across column representations
1710
1711 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1712 /// This resets the builder's descriptor.
1714
1715 void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1716 std::uint16_t versionPatch);
1717 void SetVersionForWriting();
1718
1719 void SetNTuple(const std::string_view name, const std::string_view description);
1720 void SetFeature(unsigned int flag);
1721
1724 /// The real footer size also include the page list envelopes
1726
1727 void AddField(const RFieldDescriptor &fieldDesc);
1730
1731 // The field that the column belongs to has to be already available. For fields with multiple columns,
1732 // the columns need to be added in order of the column index
1734
1737
1740
1742
1743 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1744 /// annotated as begin part of the header extension.
1745 void BeginHeaderExtension();
1746
1747 /// \brief Shift column IDs of alias columns by `offset`
1748 ///
1749 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1750 /// (regular and projected fields), the natural column order would be
1751 /// - Physical and alias columns of piece one
1752 /// - Physical and alias columns of piece two
1753 /// - etc.
1754 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1755 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1756 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1757 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1758 /// - Physical columns of piece one
1759 /// - Physical columns of piece two
1760 /// - ...
1761 // - Logical columns of piece one
1762 /// - Logical columns of piece two
1763 /// - ...
1764 void ShiftAliasColumns(std::uint32_t offset);
1765
1766 /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1768};
1769
1771{
1772 return desc.CloneSchema();
1773}
1774
1775} // namespace Internal
1776
1777} // namespace ROOT
1778
1779#endif // ROOT_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The available trivial, native content types of a column.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RResult< ROOT::Experimental::RNTupleAttrSetDescriptor > MoveDescriptor()
Attempt to make an AttributeSet descriptor.
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
std::vector< RNTupleAttrSetDescriptor >::const_iterator Iter_t
Used to loop over all the Attribute Sets linked to an RNTuple.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple)
Metadata stored for every Attribute Set linked to an RNTuple.
RNTupleAttrSetDescriptor & operator=(const RNTupleAttrSetDescriptor &other)=delete
bool operator==(const RNTupleAttrSetDescriptor &other) const
std::uint32_t fAnchorLength
uncompressed size of the linked anchor
RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other)=delete
RNTupleAttrSetDescriptor & operator=(RNTupleAttrSetDescriptor &&other)=default
const RNTupleLocator & GetAnchorLocator() const
bool operator!=(const RNTupleAttrSetDescriptor &other) const
RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other)=default
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
A helper class for piece-wise construction of an RColumnDescriptor.
ROOT::DescriptorId_t GetRepresentationIndex() const
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
A helper class for piece-wise construction of an RNTupleDescriptor.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
const RNTupleDescriptor & GetDescriptor() const
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void AddField(const RFieldDescriptor &fieldDesc)
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
RColumnRangeIterable(const RClusterDescriptor &desc)
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
ROOT::DescriptorId_t GetPhysicalColumnId() const
bool operator==(const RColumnRange &other) const
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
ROOT::NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
std::optional< std::uint32_t > GetCompressionSettings() const
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t GetFirstElementIndex() const
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
void IncrementFirstElementIndex(ROOT::NTupleSize_t by)
bool Contains(ROOT::NTupleSize_t index) const
ROOT::NTupleSize_t fNElements
The number of column elements in the cluster.
void IncrementNElements(ROOT::NTupleSize_t by)
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(const RPageRange &other)=delete
RPageRange(RPageRange &&other)=default
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::vector< ROOT::NTupleSize_t > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
bool operator==(const RPageRange &other) const
ROOT::DescriptorId_t GetPhysicalColumnId() const
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
RPageRange & operator=(RPageRange &&other)=default
std::vector< RPageInfo > & GetPageInfos()
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
ROOT::DescriptorId_t GetId() const
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
ROOT::DescriptorId_t GetId() const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetPageListLength() const
RNTupleLocator GetPageListLocator() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(const RColumnDescriptor &other)=delete
std::uint64_t GetFirstElementIndex() const
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t GetFieldId() const
RColumnDescriptor(RColumnDescriptor &&other)=default
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
std::uint32_t GetIndex() const
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::ENTupleColumnType GetType() const
ROOT::DescriptorId_t GetPhysicalId() const
std::uint16_t GetRepresentationIndex() const
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t GetBitsOnStorage() const
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
const std::string & GetContent() const
const std::string & GetTypeName() const
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
EExtraTypeInfoIds GetContentId() const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
Metadata stored for every field of an RNTuple.
const std::string & GetTypeAlias() const
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t GetId() const
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t GetFieldVersion() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
ROOT::ENTupleStructure GetStructure() const
bool IsCustomEnum(const RNTupleDescriptor &desc) const
Tells if the field describes a user-defined enum type.
bool operator==(const RFieldDescriptor &other) const
RFieldDescriptor(const RFieldDescriptor &other)=delete
std::uint32_t GetColumnCardinality() const
std::string fFieldDescription
Free text set by the user.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
ROOT::DescriptorId_t GetParentId() const
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint64_t GetNRepetitions() const
RFieldDescriptor(RFieldDescriptor &&other)=default
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
const std::string & GetFieldDescription() const
std::optional< std::uint32_t > GetTypeChecksum() const
ROOT::DescriptorId_t GetProjectionSourceId() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint32_t GetTypeVersion() const
const std::string & GetFieldName() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::string & GetTypeName() const
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor >::const_iterator Iter_t
Used to loop over all the clusters of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor >::const_iterator Iter_t
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
const std::vector< ROOT::DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &columns, std::size_t index)
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
std::vector< RExtraTypeInfoDescriptor >::const_iterator Iter_t
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &fieldChildren, std::size_t index)
const std::vector< ROOT::DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
Used to loop over a field's child fields.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< ROOT::DescriptorId_t > fFieldChildren
The descriptor IDs of the child fields.
Summarizes information about fields and the corresponding columns that were added after the header ha...
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
The on-storage metadata of an RNTuple.
std::uint64_t GetGeneration() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::vector< Experimental::RNTupleAttrSetDescriptor > fAttributeSets
List of AttributeSets linked to this RNTuple.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::size_t GetNExtraTypeInfos() const
std::uint64_t GetOnDiskFooterSize() const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::size_t GetNActiveClusters() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
const std::string & GetName() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNAttributeSets() const
std::size_t GetNClusters() const
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::size_t GetNPhysicalColumns() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
void PrintInfo(std::ostream &output) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderXxHash3() const
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::size_t GetNFields() const
static constexpr unsigned int kFeatureFlagTest
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
bool HasFeature(unsigned int flag) const
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderSize() const
std::string fDescription
Free text from the user.
ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::size_t GetNLogicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNClusterGroups() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
const std::string & GetDescription() const
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RFieldDescriptor & GetFieldZero() const
Generic information about the physical location of data.
const Int_t n
Definition legend1.C:16
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
Additional information about a page in an in-memory RPageRange.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage)
ROOT::NTupleSize_t fPageNumber
Page number in the corresponding RPageRange.
ROOT::NTupleSize_t fFirstElementIndex
Index (in cluster) of the first element in page.
void SetPageNumber(ROOT::NTupleSize_t pageNumber)
Information about a single page in the context of a cluster's page range.
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
void SetLocator(const RNTupleLocator &locator)
bool operator==(const RPageInfo &other) const
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
const RNTupleLocator & GetLocator() const
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
bool operator==(RValueRange other) const
RValueRange(std::pair< double, double > range)
bool operator!=(RValueRange other) const
bool fForwardCompatible
By default, creating a model will fail if any of the reconstructed fields contains an unknown column ...
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fEmulateUnknownTypes
If true, fields with a user defined type that have no available dictionaries will be reconstructed as...
static void output()