Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleDescriptor
16#define ROOT_RNTupleDescriptor
17
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleTypes.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <TError.h>
25
26#include <algorithm>
27#include <chrono>
28#include <cmath>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <optional>
34#include <ostream>
35#include <vector>
36#include <set>
37#include <string>
38#include <string_view>
39#include <unordered_map>
40#include <unordered_set>
41
42namespace ROOT {
43
44class RFieldBase;
45class RNTupleModel;
46
47namespace Internal {
49}
50
52
53namespace Internal {
60
61RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
66
67std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
68} // namespace Internal
69
70namespace Experimental {
71
72// clang-format off
73/**
74\class ROOT::Experimental::RNTupleAttrSetDescriptor
75\ingroup NTuple
76\brief Metadata stored for every Attribute Set linked to an RNTuple.
77*/
78// clang-format on
81
82 std::uint16_t fSchemaVersionMajor = 0;
83 std::uint16_t fSchemaVersionMinor = 0;
84 std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
85 // The locator of the AttributeSet anchor.
86 // In case of kTypeFile, it points to the beginning of the Anchor's payload.
87 // NOTE: Only kTypeFile is supported at the moment.
89 std::string fName;
90
91public:
97
98 bool operator==(const RNTupleAttrSetDescriptor &other) const;
99 bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
100
101 const std::string &GetName() const { return fName; }
102 std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
103 std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
104 std::uint32_t GetAnchorLength() const { return fAnchorLength; }
106
108};
109
110class RNTupleAttrSetDescriptorIterable;
111
112} // namespace Experimental
113
114// clang-format off
115/**
116\class ROOT::RFieldDescriptor
117\ingroup NTuple
118\brief Metadata stored for every field of an RNTuple
119*/
120// clang-format on
121class RFieldDescriptor final {
124
125private:
127 /// The version of the C++-type-to-column translation mechanics
128 std::uint32_t fFieldVersion = 0;
129 /// The version of the C++ type itself
130 std::uint32_t fTypeVersion = 0;
131 /// The leaf name, not including parent fields
132 std::string fFieldName;
133 /// Free text set by the user
134 std::string fFieldDescription;
135 /// The C++ type that was used when writing the field
136 std::string fTypeName;
137 /// A typedef or using directive that resolved to the type name during field creation
138 std::string fTypeAlias;
139 /// The number of elements per entry for fixed-size arrays
140 std::uint64_t fNRepetitions = 0;
141 /// The structural information carried by this field in the data model tree
143 /// Establishes sub field relationships, such as classes and collections
145 /// For projected fields, the source field ID
147 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
148 /// order of sub fields.
149 std::vector<ROOT::DescriptorId_t> fLinkIds;
150 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
151 /// list of logical column ids. For example, the second column of the third column representation is
152 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
153 std::uint32_t fColumnCardinality = 0;
154 /// The ordered list of columns attached to this field: first by representation index then by column index.
155 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
156 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
157 /// identify types by their checksum
158 std::optional<std::uint32_t> fTypeChecksum;
159 /// Indicates if this is a collection that should be represented in memory by a SoA layout.
160 bool fIsSoACollection = false;
161
162public:
163 RFieldDescriptor() = default;
164 RFieldDescriptor(const RFieldDescriptor &other) = delete;
168
169 bool operator==(const RFieldDescriptor &other) const;
170 /// Get a copy of the descriptor
171 RFieldDescriptor Clone() const;
172
173 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
174 /// access to sub fields, which is provided by the RNTupleDescriptor argument.
175 std::unique_ptr<ROOT::RFieldBase>
176 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
177
179 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
180 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
181 const std::string &GetFieldName() const { return fFieldName; }
182 const std::string &GetFieldDescription() const { return fFieldDescription; }
183 const std::string &GetTypeName() const { return fTypeName; }
184 const std::string &GetTypeAlias() const { return fTypeAlias; }
185 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
189 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
190 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
191 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
192 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
194 bool IsSoACollection() const { return fIsSoACollection; }
195};
196
197// clang-format off
198/**
199\class ROOT::RColumnDescriptor
200\ingroup NTuple
201\brief Metadata stored for every column of an RNTuple
202*/
203// clang-format on
204class RColumnDescriptor final {
207
208public:
209 struct RValueRange {
210 double fMin = 0, fMax = 0;
211
212 RValueRange() = default;
213 RValueRange(double min, double max) : fMin(min), fMax(max) {}
214 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
215
216 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
217 bool operator!=(RValueRange other) const { return !(*this == other); }
218 };
219
220private:
221 /// The actual column identifier, which is the link to the corresponding field
223 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
225 /// Every column belongs to one and only one field
227 /// The absolute value specifies the index for the first stored element for this column.
228 /// For deferred columns the absolute value is larger than zero.
229 /// Negative values specify a suppressed and deferred column.
230 std::int64_t fFirstElementIndex = 0U;
231 /// A field can be serialized into several columns, which are numbered from zero to $n$
232 std::uint32_t fIndex = 0;
233 /// A field may use multiple column representations, which are numbered from zero to $m$.
234 /// Every representation has the same number of columns.
235 std::uint16_t fRepresentationIndex = 0;
236 /// The size in bits of elements of this column. Most columns have the size fixed by their type
237 /// but low-precision float columns have variable bit widths.
238 std::uint16_t fBitsOnStorage = 0;
239 /// The on-disk column type
241 /// Optional value range (used e.g. by quantized real fields)
242 std::optional<RValueRange> fValueRange;
243
244public:
245 RColumnDescriptor() = default;
246 RColumnDescriptor(const RColumnDescriptor &other) = delete;
250
251 bool operator==(const RColumnDescriptor &other) const;
252 /// Get a copy of the descriptor
253 RColumnDescriptor Clone() const;
254
258 std::uint32_t GetIndex() const { return fIndex; }
259 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
260 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
261 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
263 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
265 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
267};
268
269// clang-format off
270/**
271\class ROOT::RClusterDescriptor
272\ingroup NTuple
273\brief Metadata for RNTuple clusters
274
275The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
276In a second phase, the event range, column group, page locations and column ranges are added.
277Both phases are populated by the RClusterDescriptorBuilder.
278Clusters span across all available columns in the RNTuple.
279*/
280// clang-format on
283
284public:
285 // clang-format off
286 /**
287 \class ROOT::RClusterDescriptor::RColumnRange
288 \ingroup NTuple
289 \brief The window of element indexes of a particular column in a particular cluster
290 */
291 // clang-format on
292 class RColumnRange final {
294 /// The global index of the first column element in the cluster
296 /// The number of column elements in the cluster
298 /// The usual format for ROOT compression settings (see Compression.h).
299 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
300 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
301 std::optional<std::uint32_t> fCompressionSettings;
302 /// Suppressed columns have an empty page range and unknown compression settings.
303 /// Their element index range, however, is aligned with the corresponding column of the
304 /// primary column representation (see Section "Suppressed Columns" in the specification)
305 bool fIsSuppressed = false;
306
307 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
308 // Should this be done on the field level?
309
310 public:
311 RColumnRange() = default;
312
313 RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex,
314 ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
315 bool suppressed = false)
316 : fPhysicalColumnId(physicalColumnId),
317 fFirstElementIndex(firstElementIndex),
318 fNElements(nElements),
319 fCompressionSettings(compressionSettings),
320 fIsSuppressed(suppressed)
321 {
322 }
323
326
330
334
335 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
336 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
337
338 bool IsSuppressed() const { return fIsSuppressed; }
339 void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
340
341 bool operator==(const RColumnRange &other) const
342 {
346 }
347
352 };
353
354 // clang-format off
355 /**
356 \class ROOT::RClusterDescriptor::RPageInfo
357 \ingroup NTuple
358 \brief Information about a single page in the context of a cluster's page range.
359 */
360 // clang-format on
361 // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
362 // the page belongs
363 struct RPageInfo {
364 private:
365 /// The meaning of `fLocator` depends on the storage backend.
367 /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
368 std::uint32_t fNElements = std::uint32_t(-1);
369 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
370 bool fHasChecksum = false;
371
372 public:
373 RPageInfo() = default;
374 RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
375 : fLocator(locator), fNElements(nElements), fHasChecksum(hasChecksum)
376 {
377 }
378
379 bool operator==(const RPageInfo &other) const
380 {
381 return fLocator == other.fLocator && fNElements == other.fNElements;
382 }
383
384 const RNTupleLocator &GetLocator() const { return fLocator; }
386 void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
387
388 std::uint32_t GetNElements() const { return fNElements; }
389 void SetNElements(std::uint32_t n) { fNElements = n; }
390
391 bool HasChecksum() const { return fHasChecksum; }
392 void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
393 };
394
395 // clang-format off
396 /**
397 \class ROOT::RClusterDescriptor::RPageInfoExtended
398 \ingroup NTuple
399 \brief Additional information about a page in an in-memory RPageRange.
400
401 Used by RPageRange::Find() to return information relative to the RPageRange. This information is not stored on disk
402 and we don't need to keep it in memory because it can be easily recomputed.
403 */
404 // clang-format on
406 private:
407 /// Index (in cluster) of the first element in page.
409 /// Page number in the corresponding RPageRange.
411
412 public:
413 RPageInfoExtended() = default;
414 RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
415 : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber)
416 {
417 }
418
420 void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; }
421
423 void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; }
424 };
425
426 // clang-format off
427 /**
428 \class ROOT::RClusterDescriptor::RPageRange
429 \ingroup NTuple
430 \brief Records the partition of data into pages for a particular column in a particular cluster
431 */
432 // clang-format on
433 class RPageRange final {
435
436 private:
437 /// \brief Extend this RPageRange to fit the given RColumnRange.
438 ///
439 /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
440 /// RPageInfos are constructed to contain as many elements of type `element` given a page size
441 /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
442 /// This function is used to make up RPageRanges for clusters that contain deferred columns.
443 /// \return The number of column elements covered by the synthesized RPageInfos
444 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
445 const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
446
447 std::vector<RPageInfo> fPageInfos;
448
449 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
450 /// up to and including a given index. Used for binary search in Find().
451 /// This vector is only created if fPageInfos has at least kLargeRangeThreshold elements.
452 std::unique_ptr<std::vector<ROOT::NTupleSize_t>> fCumulativeNElements;
453
455
456 public:
457 /// Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search through.
458 static constexpr std::size_t kLargeRangeThreshold = 10;
459
460 RPageRange() = default;
461 RPageRange(const RPageRange &other) = delete;
462 RPageRange &operator=(const RPageRange &other) = delete;
463 RPageRange(RPageRange &&other) = default;
464 RPageRange &operator=(RPageRange &&other) = default;
465
467 {
468 RPageRange clone;
470 clone.fPageInfos = fPageInfos;
472 clone.fCumulativeNElements = std::make_unique<std::vector<ROOT::NTupleSize_t>>(*fCumulativeNElements);
473 }
474 return clone;
475 }
476
477 /// Find the page in the RPageRange that contains the given element. The element must exist.
478 RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
479
482
483 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
484 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
485
486 bool operator==(const RPageRange &other) const
487 {
488 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
489 }
490 };
491
492private:
494 /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
497
498 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
499 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
500
501public:
503
509
511
512 bool operator==(const RClusterDescriptor &other) const;
513
517 const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
518 const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
519 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
520 RColumnRangeIterable GetColumnRangeIterable() const;
522 {
523 return fColumnRanges.find(physicalId) != fColumnRanges.end();
524 }
525 std::uint64_t GetNBytesOnStorage() const;
526};
527
529private:
531
532public:
533 class RIterator final {
534 private:
535 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
536 /// The wrapped map iterator
538
539 public:
540 using iterator_category = std::forward_iterator_tag;
543 using difference_type = std::ptrdiff_t;
544 using pointer = const RColumnRange *;
545 using reference = const RColumnRange &;
546
547 RIterator() = default;
548 explicit RIterator(Iter_t iter) : fIter(iter) {}
549 iterator &operator++() /* prefix */
550 {
551 ++fIter;
552 return *this;
553 }
554 iterator operator++(int) /* postfix */
555 {
556 auto old = *this;
557 operator++();
558 return old;
559 }
560 reference operator*() const { return fIter->second; }
561 pointer operator->() const { return &fIter->second; }
562 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
563 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
564 };
565
566 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
567
568 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
569 RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
570 size_t size() { return fDesc.fColumnRanges.size(); }
571};
572
573// clang-format off
574/**
575\class ROOT::RClusterGroupDescriptor
576\ingroup NTuple
577\brief Clusters are bundled in cluster groups.
578
579Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
580Every RNTuple has at least one cluster group. The clusters in a cluster group are ordered
581corresponding to their first entry number.
582*/
583// clang-format on
586
587private:
589 /// The cluster IDs can be empty if the corresponding page list is not loaded.
590 /// Otherwise, cluster ids are sorted by first entry number.
591 std::vector<ROOT::DescriptorId_t> fClusterIds;
592 /// The page list that corresponds to the cluster group
594 /// Uncompressed size of the page list
595 std::uint64_t fPageListLength = 0;
596 /// The minimum first entry number of the clusters in the cluster group
597 std::uint64_t fMinEntry = 0;
598 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
599 std::uint64_t fEntrySpan = 0;
600 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
601 std::uint32_t fNClusters = 0;
602
603public:
609
611 /// Creates a clone without the cluster IDs
613
614 bool operator==(const RClusterGroupDescriptor &other) const;
615
617 std::uint32_t GetNClusters() const { return fNClusters; }
619 std::uint64_t GetPageListLength() const { return fPageListLength; }
620 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
621 std::uint64_t GetMinEntry() const { return fMinEntry; }
622 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
623 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
624 /// Stage two loads the list of cluster IDs.
625 bool HasClusterDetails() const { return !fClusterIds.empty(); }
626};
627
628/// Used in RExtraTypeInfoDescriptor
633
634// clang-format off
635/**
636\class ROOT::RExtraTypeInfoDescriptor
637\ingroup NTuple
638\brief Field specific extra type information from the header / extenstion header
639
640Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
641*/
642// clang-format on
645
646private:
647 /// Specifies the meaning of the extra information
649 /// Type version the extra type information is bound to
650 std::uint32_t fTypeVersion = 0;
651 /// The type name the extra information refers to; empty for RNTuple-wide extra information
652 std::string fTypeName;
653 /// The content format depends on the content ID and may be binary
654 std::string fContent;
655
656public:
662
663 bool operator==(const RExtraTypeInfoDescriptor &other) const;
664
666
668 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
669 const std::string &GetTypeName() const { return fTypeName; }
670 const std::string &GetContent() const { return fContent; }
671};
672
673namespace Internal {
674// Used by the RNTupleReader to activate/deactivate entries. Needs to adapt when we have sharded clusters.
676} // namespace Internal
677
678// clang-format off
679/**
680\class ROOT::RNTupleDescriptor
681\ingroup NTuple
682\brief The on-storage metadata of an RNTuple
683
684Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
685potentially multiple page lists.
686The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
687The footer carries information about one or several cluster groups and links to their page lists.
688For every cluster group, a page list envelope stores cluster summaries and page locations.
689For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
690locations.
691
692The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
693for RNTuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
694
695The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
696the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
697Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
698and backward compatibility when the metadata evolves.
699*/
700// clang-format on
701class RNTupleDescriptor final {
705
706public:
707 class RHeaderExtension;
708
709private:
710 /// The RNTuple name needs to be unique in a given storage location (file)
711 std::string fName;
712 /// Free text from the user
713 std::string fDescription;
714
716
717 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
718
719 std::set<unsigned int> fFeatureFlags;
720 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
721 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
722
723 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
724 std::unique_ptr<RHeaderExtension> fHeaderExtension;
725
726 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
727 //// (see CloneSchema())
728
729 std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
730 std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
731 std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
732 std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
733
734 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
735 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
736 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
737
738 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
739 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
740
741 /// \brief The generation of the descriptor
742 ///
743 /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
744 /// active page locations. During the lifetime of the descriptor, page location information for clusters
745 /// can be added or removed. When this happens, the generation should be increased, so that users of the
746 /// descriptor know that the information changed. The generation is increased, e.g., by the page source's
747 /// exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
748 std::uint64_t fGeneration = 0;
749
750 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
751 /// References cluster groups sorted by entry range and thus allows for binary search.
752 /// Note that this list is empty during the descriptor building process and will only be
753 /// created when the final descriptor is extracted from the builder.
754 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
755 /// Potentially a subset of all the available clusters
756 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
757 /// List of AttributeSets linked to this RNTuple
758 std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
759
760 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
762
763 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
764 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
765 /// when merging two RNTuples.
767
768public:
769 /// All known feature flags.
770 /// Note that the flag values represent the bit _index_, not the already-bitshifted integer.
772 // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature
773 // flags values, the check in RNTupleSerialize must be updated.
774
775 // End of regular feature flags
777
778 /// Reserved for forward-compatibility testing
780 };
781
782 class RColumnDescriptorIterable;
783 class RFieldDescriptorIterable;
784 class RClusterGroupDescriptorIterable;
785 class RClusterDescriptorIterable;
786 class RExtraTypeInfoDescriptorIterable;
788
789 /// Modifiers passed to CreateModel()
791 private:
792 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
793 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
795 /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
796 /// or an unknown field structural role.
797 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
798 /// or indirectly) will be skipped instead.
799 bool fForwardCompatible = false;
800 /// If true, the model will be created without a default entry (bare model).
801 bool fCreateBare = false;
802 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
803 /// as record fields from the on-disk information; otherwise, they will cause an error.
805
806 public:
807 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
808
811
814
815 void SetCreateBare(bool v) { fCreateBare = v; }
816 bool GetCreateBare() const { return fCreateBare; }
817
820 };
821
822 RNTupleDescriptor() = default;
823 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
827
828 RNTupleDescriptor Clone() const;
829
830 bool operator==(const RNTupleDescriptor &other) const;
831
832 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
833 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
834 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
835 /// \see ROOT::RNTuple::GetCurrentVersion()
836 std::uint64_t GetVersion() const
837 {
838 return (static_cast<std::uint64_t>(fVersionEpoch) << 48) | (static_cast<std::uint64_t>(fVersionMajor) << 32) |
839 (static_cast<std::uint64_t>(fVersionMinor) << 16) | (static_cast<std::uint64_t>(fVersionPatch));
840 }
841
843 {
844 return fFieldDescriptors.at(fieldId);
845 }
847 {
848 return fColumnDescriptors.at(columnId);
849 }
851 {
852 return fClusterGroupDescriptors.at(clusterGroupId);
853 }
855 {
856 return fClusterDescriptors.at(clusterId);
857 }
858
859 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
860 RFieldDescriptorIterable
861 GetFieldIterable(const RFieldDescriptor &fieldDesc,
862 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
863 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
864 RFieldDescriptorIterable
866 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
867
868 RFieldDescriptorIterable GetTopLevelFields() const;
869 RFieldDescriptorIterable
870 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
871
872 RColumnDescriptorIterable GetColumnIterable() const;
873 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
874 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
875
876 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
877
878 RClusterDescriptorIterable GetClusterIterable() const;
879
880 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
881
883
884 const std::string &GetName() const { return fName; }
885 const std::string &GetDescription() const { return fDescription; }
886
887 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
888 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
889 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
890 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
891 std::size_t GetNClusters() const { return fNClusters; }
892 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
893 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
894 std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
895
896 /// We know the number of entries from adding the cluster summaries
899
900 /// Returns the logical parent of all top-level RNTuple data fields.
903 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
904 /// Searches for a top-level field
905 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
906 ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
907 std::uint16_t representationIndex) const;
908 ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
909 std::uint16_t representationIndex) const;
913
914 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
915 /// In case of invalid field ID, an empty string is returned.
916 std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
917
918 /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
919 std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
920
921 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
922 std::vector<std::uint64_t> GetFeatureFlags() const;
923
924 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
925 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
926
927 /// Methods to load and drop cluster group details (cluster IDs and page locations)
929 AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
931
932 std::uint64_t GetGeneration() const { return fGeneration; }
934
935 /// Re-create the C++ model from the stored metadata
936 std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
937 void PrintInfo(std::ostream &output) const;
938};
939
940// clang-format off
941/**
942\class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
943\ingroup NTuple
944\brief Used to loop over a field's associated columns
945*/
946// clang-format on
948private:
949 /// The associated RNTuple for this range.
951 /// The descriptor ids of the columns ordered by field, representation, and column index
952 std::vector<ROOT::DescriptorId_t> fColumns = {};
953
954public:
955 class RIterator final {
956 private:
957 /// The enclosing RColumnDescriptorIterable.
959 std::size_t fIndex;
960
961 public:
962 using iterator_category = std::forward_iterator_tag;
965 using difference_type = std::ptrdiff_t;
966 using pointer = const RColumnDescriptor *;
968
969 explicit RIterator(const RColumnDescriptorIterable *iterable = nullptr, std::size_t index = 0)
970 : fIterable(iterable), fIndex(index)
971 {
972 }
973 iterator &operator++() /* prefix */
974 {
975 ++fIndex;
976 return *this;
977 }
978 iterator operator++(int) /* postfix */
979 {
980 auto old = *this;
981 operator++();
982 return old;
983 }
985 {
986 if (fIterable)
987 return fIterable->fNTuple.GetColumnDescriptor(fIterable->fColumns.at(fIndex));
988 throw RException(R__FAIL("dereference of RNTupleDescriptor::RColumnDescriptorIterable::RIterator"
989 " constructed without RNTupleDescriptor::RColumnDescriptorIterable"));
990 }
992 {
993 return fIterable ? &fIterable->fNTuple.GetColumnDescriptor(fIterable->fColumns.at(fIndex)) : nullptr;
994 }
995 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex || fIterable != rh.fIterable; }
996 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex && fIterable == rh.fIterable; }
997 };
998
999 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
1001
1002 RIterator begin() { return RIterator(this, 0); }
1003 RIterator end() { return RIterator(this, fColumns.size()); }
1004 size_t size() { return fColumns.size(); }
1005};
1006
1007// clang-format off
1008/**
1009\class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
1010\ingroup NTuple
1011\brief Used to loop over a field's child fields
1012*/
1013// clang-format on
1015private:
1016 /// The associated RNTuple for this range.
1018 /// The descriptor IDs of the child fields. These may be sorted using
1019 /// a comparison function.
1020 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
1021
1022public:
1023 class RIterator final {
1024 private:
1025 /// The enclosing RFieldDescriptorIterable.
1027 std::size_t fIndex;
1028
1029 public:
1030 using iterator_category = std::forward_iterator_tag;
1033 using difference_type = std::ptrdiff_t;
1034 using pointer = const RFieldDescriptor *;
1036
1037 explicit RIterator(const RFieldDescriptorIterable *iterable = nullptr, std::size_t index = 0)
1038 : fIterable(iterable), fIndex(index)
1039 {
1040 }
1041 iterator &operator++() /* prefix */
1042 {
1043 ++fIndex;
1044 return *this;
1045 }
1046 iterator operator++(int) /* postfix */
1047 {
1048 auto old = *this;
1049 operator++();
1050 return old;
1051 }
1053 {
1054 if (fIterable)
1055 return fIterable->fNTuple.GetFieldDescriptor(fIterable->fFieldChildren.at(fIndex));
1056 throw RException(R__FAIL("dereference of RNTupleDescriptor::RFieldDescriptorIterable::RIterator"
1057 " constructed without RNTupleDescriptor::RFieldDescriptorIterable"));
1058 }
1060 {
1061 return fIterable ? &fIterable->fNTuple.GetFieldDescriptor(fIterable->fFieldChildren.at(fIndex)) : nullptr;
1062 }
1063 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex || fIterable != rh.fIterable; }
1064 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex && fIterable == rh.fIterable; }
1065 };
1067 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1068 {
1069 }
1070 /// Sort the range using an arbitrary comparison function.
1072 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1073 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1074 {
1075 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1076 }
1077 RIterator begin() { return RIterator(this, 0); }
1078 RIterator end() { return RIterator(this, fFieldChildren.size()); }
1079};
1080
1081// clang-format off
1082/**
1083\class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1084\ingroup NTuple
1085\brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1086
1087Enumerate all cluster group IDs from the descriptor. No specific order can be assumed.
1088*/
1089// clang-format on
1091private:
1092 /// The associated RNTuple for this range.
1094
1095public:
1096 class RIterator final {
1097 private:
1098 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1099 /// The wrapped map iterator
1101
1102 public:
1103 using iterator_category = std::forward_iterator_tag;
1106 using difference_type = std::ptrdiff_t;
1109
1110 RIterator() = default;
1111 explicit RIterator(Iter_t iter) : fIter(iter) {}
1112 iterator &operator++() /* prefix */
1113 {
1114 ++fIter;
1115 return *this;
1116 }
1117 iterator operator++(int) /* postfix */
1118 {
1119 auto old = *this;
1120 operator++();
1121 return old;
1122 }
1123 reference operator*() const { return fIter->second; }
1124 pointer operator->() const { return &fIter->second; }
1125 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1126 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1127 };
1128
1130 RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1131 RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1132};
1133
1134// clang-format off
1135/**
1136\class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1137\ingroup NTuple
1138\brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1139
1140Enumerate all cluster IDs from all cluster descriptors. No specific order can be assumed, use
1141RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1142clusters by entry number.
1143*/
1144// clang-format on
1146private:
1147 /// The associated RNTuple for this range.
1149
1150public:
1151 class RIterator final {
1152 private:
1153 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1154 /// The wrapped map iterator
1156
1157 public:
1158 using iterator_category = std::forward_iterator_tag;
1161 using difference_type = std::ptrdiff_t;
1164
1165 RIterator() = default;
1166 explicit RIterator(Iter_t iter) : fIter(iter) {}
1167 iterator &operator++() /* prefix */
1168 {
1169 ++fIter;
1170 return *this;
1171 }
1172 iterator operator++(int) /* postfix */
1173 {
1174 auto old = *this;
1175 operator++();
1176 return old;
1177 }
1178 reference operator*() const { return fIter->second; }
1179 pointer operator->() const { return &fIter->second; }
1180 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1181 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1182 };
1183
1185 RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1186 RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1187};
1188
1189// clang-format off
1190/**
1191\class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1192\ingroup NTuple
1193\brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1194*/
1195// clang-format on
1197private:
1198 /// The associated RNTuple for this range.
1200
1201public:
1202 class RIterator final {
1203 private:
1204 using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1205 /// The wrapped vector iterator
1207
1208 public:
1209 using iterator_category = std::forward_iterator_tag;
1212 using difference_type = std::ptrdiff_t;
1215
1216 RIterator() = default;
1217 explicit RIterator(Iter_t iter) : fIter(iter) {}
1218 iterator &operator++() /* prefix */
1219 {
1220 ++fIter;
1221 return *this;
1222 }
1223 iterator operator++(int) /* postfix */
1224 {
1225 auto old = *this;
1226 operator++();
1227 return old;
1228 }
1229 reference operator*() const { return *fIter; }
1230 pointer operator->() const { return fIter.operator->(); }
1231 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1232 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1233 };
1234
1236 RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1237 RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1238};
1239
1240namespace Experimental {
1241// clang-format off
1242/**
1243\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1244\ingroup NTuple
1245\brief Used to loop over all the Attribute Sets linked to an RNTuple
1246*/
1247// clang-format on
1248// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1250private:
1251 /// The associated RNTuple for this range.
1253
1254public:
1255 class RIterator final {
1256 private:
1257 using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1258 /// The wrapped vector iterator
1260
1261 public:
1262 using iterator_category = std::forward_iterator_tag;
1265 using difference_type = std::ptrdiff_t;
1266 using pointer = const value_type *;
1267 using reference = const value_type &;
1268
1269 RIterator() = default;
1270 explicit RIterator(Iter_t iter) : fIter(iter) {}
1271 iterator &operator++() /* prefix */
1272 {
1273 ++fIter;
1274 return *this;
1275 }
1276 iterator operator++(int) /* postfix */
1277 {
1278 auto old = *this;
1279 operator++();
1280 return old;
1281 }
1282 reference operator*() const { return *fIter; }
1283 pointer operator->() const { return fIter.operator->(); }
1284 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1285 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1286 };
1287
1289 RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1290 RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1291};
1292} // namespace Experimental
1293
1294// clang-format off
1295/**
1296\class ROOT::RNTupleDescriptor::RHeaderExtension
1297\ingroup NTuple
1298\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1299*/
1300// clang-format on
1303
1304private:
1305 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1306 /// the fields in that order.
1307 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1308 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1309 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1310 /// belongs to a field of the regular header that gets extended by additional column representations.
1311 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1312 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1313 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1314 /// these columns need to be serialized in the extension header without re-serializing the field.
1315 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1316 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1317 std::uint32_t fNLogicalColumns = 0;
1318 std::uint32_t fNPhysicalColumns = 0;
1319
1320 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1321 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1322 /// Descriptor alongside non-extended fields.
1324 {
1325 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1326 fFieldIdsLookup.insert(fieldDesc.GetId());
1327 }
1328
1329 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1330 /// added through late model extension as an additional representation of an existing column). Note that the column
1331 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1333 {
1335 if (!columnDesc.IsAliasColumn())
1337 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1338 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1339 }
1340 }
1341
1342public:
1343 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1344 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1345 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1346 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1347 {
1349 }
1350 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1351 /// of their addition. Note that these fields are not necessarily top-level fields in the overall schema.
1352 /// If a nested field is extended, it will return the top-most field of the extended subtree.
1353 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1354 /// the field is not yet linked into the schema tree.
1355 std::vector<ROOT::DescriptorId_t> GetTopMostFields(const RNTupleDescriptor &desc) const;
1356
1358 {
1359 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1360 }
1362 {
1363 return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1365 }
1366};
1367
1368namespace Experimental::Internal {
1371
1372public:
1374 {
1375 fDesc.fName = name;
1376 return *this;
1377 }
1378 RNTupleAttrSetDescriptorBuilder &SchemaVersion(std::uint16_t major, std::uint16_t minor)
1379 {
1380 fDesc.fSchemaVersionMajor = major;
1381 fDesc.fSchemaVersionMinor = minor;
1382 return *this;
1383 }
1385 {
1386 fDesc.fAnchorLocator = loc;
1387 return *this;
1388 }
1390 {
1391 fDesc.fAnchorLength = length;
1392 return *this;
1393 }
1394
1395 /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1396 /// was not given enough information to make a proper descriptor.
1398};
1399} // namespace Experimental::Internal
1400
1401namespace Internal {
1402
1403// clang-format off
1404/**
1405\class ROOT::Internal::RColumnDescriptorBuilder
1406\ingroup NTuple
1407\brief A helper class for piece-wise construction of an RColumnDescriptor
1408
1409Dangling column descriptors can become actual descriptors when added to an
1410RNTupleDescriptorBuilder instance and then linked to their fields.
1411*/
1412// clang-format on
1414private:
1416
1417public:
1418 /// Make an empty column descriptor builder.
1420
1422 {
1423 fColumn.fLogicalColumnId = logicalColumnId;
1424 return *this;
1425 }
1427 {
1428 fColumn.fPhysicalColumnId = physicalColumnId;
1429 return *this;
1430 }
1431 RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1432 {
1433 fColumn.fBitsOnStorage = bitsOnStorage;
1434 return *this;
1435 }
1437 {
1438 fColumn.fType = type;
1439 return *this;
1440 }
1442 {
1443 fColumn.fFieldId = fieldId;
1444 return *this;
1445 }
1447 {
1448 fColumn.fIndex = index;
1449 return *this;
1450 }
1451 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1452 {
1453 fColumn.fFirstElementIndex = firstElementIdx;
1454 return *this;
1455 }
1457 {
1458 R__ASSERT(fColumn.fFirstElementIndex != 0);
1459 if (fColumn.fFirstElementIndex > 0)
1460 fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1461 return *this;
1462 }
1463 RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1464 {
1465 fColumn.fRepresentationIndex = representationIndex;
1466 return *this;
1467 }
1468 RColumnDescriptorBuilder &ValueRange(double min, double max)
1469 {
1470 fColumn.fValueRange = {min, max};
1471 return *this;
1472 }
1473 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1474 {
1475 fColumn.fValueRange = valueRange;
1476 return *this;
1477 }
1478 ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1479 ROOT::DescriptorId_t GetRepresentationIndex() const { return fColumn.fRepresentationIndex; }
1480 /// Attempt to make a column descriptor. This may fail if the column
1481 /// was not given enough information to make a proper descriptor.
1483};
1484
1485// clang-format off
1486/**
1487\class ROOT::Internal::RFieldDescriptorBuilder
1488\ingroup NTuple
1489\brief A helper class for piece-wise construction of an RFieldDescriptor
1490
1491Dangling field descriptors describe a single field in isolation. They are
1492missing the necessary relationship information (parent field, any child fields)
1493required to describe a real RNTuple field.
1494
1495Dangling field descriptors can only become actual descriptors when added to an
1496RNTupleDescriptorBuilder instance and then linked to other fields.
1497*/
1498// clang-format on
1500private:
1502
1503public:
1504 /// Make an empty dangling field descriptor.
1506
1507 /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1509
1511 {
1512 fField.fFieldId = fieldId;
1513 return *this;
1514 }
1515 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1516 {
1517 fField.fFieldVersion = fieldVersion;
1518 return *this;
1519 }
1520 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1521 {
1522 fField.fTypeVersion = typeVersion;
1523 return *this;
1524 }
1526 {
1527 fField.fParentId = id;
1528 return *this;
1529 }
1531 {
1532 fField.fProjectionSourceId = id;
1533 return *this;
1534 }
1535 RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1536 {
1537 fField.fFieldName = fieldName;
1538 return *this;
1539 }
1540 RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1541 {
1542 fField.fFieldDescription = fieldDescription;
1543 return *this;
1544 }
1545 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1546 {
1547 fField.fTypeName = typeName;
1548 return *this;
1549 }
1550 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1551 {
1552 fField.fTypeAlias = typeAlias;
1553 return *this;
1554 }
1555 RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1556 {
1557 fField.fNRepetitions = nRepetitions;
1558 return *this;
1559 }
1561 {
1562 fField.fStructure = structure;
1563 return *this;
1564 }
1565 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1566 {
1567 fField.fTypeChecksum = typeChecksum;
1568 return *this;
1569 }
1571 {
1572 fField.fIsSoACollection = val;
1573 return *this;
1574 }
1575 ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1576 /// Attempt to make a field descriptor. This may fail if the dangling field
1577 /// was not given enough information to make a proper descriptor.
1579};
1580
1581// clang-format off
1582/**
1583\class ROOT::Internal::RClusterDescriptorBuilder
1584\ingroup NTuple
1585\brief A helper class for piece-wise construction of an RClusterDescriptor
1586
1587The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1588piecewise addition of page locations.
1589*/
1590// clang-format on
1592private:
1594
1595public:
1597 {
1598 fCluster.fClusterId = clusterId;
1599 return *this;
1600 }
1601
1602 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1603 {
1604 fCluster.fFirstEntryIndex = firstEntryIndex;
1605 return *this;
1606 }
1607
1608 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1609 {
1610 fCluster.fNEntries = nEntries;
1611 return *this;
1612 }
1613
1614 RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1615 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1616
1617 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1618 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1619 /// MarkSuppressedColumnRange() took place.
1621
1622 /// Sets the first element index and number of elements for all the suppressed column ranges.
1623 /// The information is taken from the corresponding columns from the primary representation.
1624 /// Needs to be called when all the columns (suppressed and regular) where added.
1626
1627 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1628 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1629 /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1630 /// should happen before calling this function.
1632
1634 {
1635 return fCluster.GetColumnRange(physicalId);
1636 }
1637
1638 /// Move out the full cluster descriptor including page locations
1640};
1641
1642// clang-format off
1643/**
1644\class ROOT::Internal::RClusterGroupDescriptorBuilder
1645\ingroup NTuple
1646\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1647*/
1648// clang-format on
1650private:
1652
1653public:
1656
1658 {
1659 fClusterGroup.fClusterGroupId = clusterGroupId;
1660 return *this;
1661 }
1663 {
1664 fClusterGroup.fPageListLocator = pageListLocator;
1665 return *this;
1666 }
1667 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1668 {
1669 fClusterGroup.fPageListLength = pageListLength;
1670 return *this;
1671 }
1673 {
1674 fClusterGroup.fMinEntry = minEntry;
1675 return *this;
1676 }
1678 {
1679 fClusterGroup.fEntrySpan = entrySpan;
1680 return *this;
1681 }
1683 {
1684 fClusterGroup.fNClusters = nClusters;
1685 return *this;
1686 }
1687 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1688 {
1689 if (clusterIds.size() != fClusterGroup.GetNClusters())
1690 throw RException(R__FAIL("mismatch of number of clusters"));
1691 fClusterGroup.fClusterIds = clusterIds;
1692 }
1693
1695};
1696
1697// clang-format off
1698/**
1699\class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1700\ingroup NTuple
1701\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1702*/
1703// clang-format on
1705private:
1707
1708public:
1710
1712 {
1713 fExtraTypeInfo.fContentId = contentId;
1714 return *this;
1715 }
1717 {
1718 fExtraTypeInfo.fTypeVersion = typeVersion;
1719 return *this;
1720 }
1721 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1722 {
1723 fExtraTypeInfo.fTypeName = typeName;
1724 return *this;
1725 }
1726 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1727 {
1728 fExtraTypeInfo.fContent = content;
1729 return *this;
1730 }
1731
1733};
1734
1735// clang-format off
1736/**
1737\class ROOT::Internal::RNTupleDescriptorBuilder
1738\ingroup NTuple
1739\brief A helper class for piece-wise construction of an RNTupleDescriptor
1740
1741Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1742*/
1743// clang-format on
1745private:
1748
1749public:
1750 /// Checks whether invariants hold:
1751 /// * RNTuple epoch is valid
1752 /// * RNTuple name is valid
1753 /// * Fields have valid parents
1754 /// * Number of columns is constant across column representations
1758
1759 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1760 /// This resets the builder's descriptor.
1761 void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1762
1763 void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1764 std::uint16_t versionPatch);
1765 void SetVersionForWriting();
1766
1767 void SetNTuple(const std::string_view name, const std::string_view description);
1768 /// Sets the `flag`-th bit of the feature flag to 1.
1769 /// Note that `flag` itself is not a bitmask, just the bit index of the flag to enable.
1770 void SetFeature(unsigned int flag);
1771
1772 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1773 void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1774 /// The real footer size also include the page list envelopes
1775 void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1776
1777 void AddField(const RFieldDescriptor &fieldDesc);
1780
1781 // The field that the column belongs to has to be already available. For fields with multiple columns,
1782 // the columns need to be added in order of the column index
1784
1787
1789 void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1790
1792
1793 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1794 /// annotated as begin part of the header extension.
1795 void BeginHeaderExtension();
1796
1797 /// \brief Shift column IDs of alias columns by `offset`
1798 ///
1799 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1800 /// (regular and projected fields), the natural column order would be
1801 /// - Physical and alias columns of piece one
1802 /// - Physical and alias columns of piece two
1803 /// - etc.
1804 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1805 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1806 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1807 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1808 /// - Physical columns of piece one
1809 /// - Physical columns of piece two
1810 /// - ...
1811 // - Logical columns of piece one
1812 /// - Logical columns of piece two
1813 /// - ...
1814 void ShiftAliasColumns(std::uint32_t offset);
1815};
1816
1818{
1819 return desc.CloneSchema();
1820}
1821
1822/// Tells if the field describes a user-defined enum type.
1823/// The dictionary does not need to be available for this method.
1824/// Needs the full descriptor to look up sub fields.
1825bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc);
1826
1827/// Tells if the field describes a std::atomic<T> type
1828bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc);
1829
1830} // namespace Internal
1831
1832} // namespace ROOT
1833
1834#endif // ROOT_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:148
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RResult< ROOT::Experimental::RNTupleAttrSetDescriptor > MoveDescriptor()
Attempt to make an AttributeSet descriptor.
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
std::vector< RNTupleAttrSetDescriptor >::const_iterator Iter_t
Used to loop over all the Attribute Sets linked to an RNTuple.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple)
Metadata stored for every Attribute Set linked to an RNTuple.
RNTupleAttrSetDescriptor & operator=(const RNTupleAttrSetDescriptor &other)=delete
bool operator==(const RNTupleAttrSetDescriptor &other) const
std::uint32_t fAnchorLength
uncompressed size of the linked anchor
RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other)=delete
RNTupleAttrSetDescriptor & operator=(RNTupleAttrSetDescriptor &&other)=default
const RNTupleLocator & GetAnchorLocator() const
bool operator!=(const RNTupleAttrSetDescriptor &other) const
RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other)=default
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
A helper class for piece-wise construction of an RColumnDescriptor.
ROOT::DescriptorId_t GetRepresentationIndex() const
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & IsSoACollection(bool val)
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
A helper class for piece-wise construction of an RNTupleDescriptor.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
const RNTupleDescriptor & GetDescriptor() const
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void AddField(const RFieldDescriptor &fieldDesc)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
void SetFeature(unsigned int flag)
Sets the flag-th bit of the feature flag to 1.
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
RColumnRangeIterable(const RClusterDescriptor &desc)
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
ROOT::DescriptorId_t GetPhysicalColumnId() const
bool operator==(const RColumnRange &other) const
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
ROOT::NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
std::optional< std::uint32_t > GetCompressionSettings() const
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t GetFirstElementIndex() const
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
void IncrementFirstElementIndex(ROOT::NTupleSize_t by)
bool Contains(ROOT::NTupleSize_t index) const
ROOT::NTupleSize_t fNElements
The number of column elements in the cluster.
void IncrementNElements(ROOT::NTupleSize_t by)
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(const RPageRange &other)=delete
std::unique_ptr< std::vector< ROOT::NTupleSize_t > > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
RPageRange(RPageRange &&other)=default
static constexpr std::size_t kLargeRangeThreshold
Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search throug...
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
bool operator==(const RPageRange &other) const
ROOT::DescriptorId_t GetPhysicalColumnId() const
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
RPageRange & operator=(RPageRange &&other)=default
std::vector< RPageInfo > & GetPageInfos()
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
ROOT::DescriptorId_t GetId() const
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
ROOT::DescriptorId_t GetId() const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetPageListLength() const
RNTupleLocator GetPageListLocator() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(const RColumnDescriptor &other)=delete
std::uint64_t GetFirstElementIndex() const
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t GetFieldId() const
RColumnDescriptor(RColumnDescriptor &&other)=default
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
std::uint32_t GetIndex() const
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::ENTupleColumnType GetType() const
ROOT::DescriptorId_t GetPhysicalId() const
std::uint16_t GetRepresentationIndex() const
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t GetBitsOnStorage() const
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
const std::string & GetContent() const
const std::string & GetTypeName() const
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
EExtraTypeInfoIds GetContentId() const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
Metadata stored for every field of an RNTuple.
const std::string & GetTypeAlias() const
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t GetId() const
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t GetFieldVersion() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
ROOT::ENTupleStructure GetStructure() const
bool operator==(const RFieldDescriptor &other) const
RFieldDescriptor(const RFieldDescriptor &other)=delete
std::uint32_t GetColumnCardinality() const
std::string fFieldDescription
Free text set by the user.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
ROOT::DescriptorId_t GetParentId() const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint64_t GetNRepetitions() const
RFieldDescriptor(RFieldDescriptor &&other)=default
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
const std::string & GetFieldDescription() const
std::optional< std::uint32_t > GetTypeChecksum() const
ROOT::DescriptorId_t GetProjectionSourceId() const
bool fIsSoACollection
Indicates if this is a collection that should be represented in memory by a SoA layout.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint32_t GetTypeVersion() const
const std::string & GetFieldName() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::string & GetTypeName() const
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor >::const_iterator Iter_t
Used to loop over all the clusters of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor >::const_iterator Iter_t
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RColumnDescriptorIterable *iterable=nullptr, std::size_t index=0)
const RColumnDescriptorIterable * fIterable
The enclosing RColumnDescriptorIterable.
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
std::vector< RExtraTypeInfoDescriptor >::const_iterator Iter_t
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RFieldDescriptorIterable *iterable=nullptr, std::size_t index=0)
const RFieldDescriptorIterable * fIterable
The enclosing RFieldDescriptorIterable.
Used to loop over a field's child fields.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< ROOT::DescriptorId_t > fFieldChildren
The descriptor IDs of the child fields.
Summarizes information about fields and the corresponding columns that were added after the header ha...
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< ROOT::DescriptorId_t > GetTopMostFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
The on-storage metadata of an RNTuple.
std::uint64_t GetGeneration() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::vector< Experimental::RNTupleAttrSetDescriptor > fAttributeSets
List of AttributeSets linked to this RNTuple.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::size_t GetNExtraTypeInfos() const
std::uint64_t GetOnDiskFooterSize() const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::size_t GetNActiveClusters() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
const std::string & GetName() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNAttributeSets() const
std::size_t GetNClusters() const
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::size_t GetNPhysicalColumns() const
EFeatureFlags
All known feature flags.
@ kFeatureFlag_Test
Reserved for forward-compatibility testing.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t GetVersion() const
void PrintInfo(std::ostream &output) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderXxHash3() const
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::size_t GetNFields() const
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
bool HasFeature(unsigned int flag) const
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderSize() const
std::string fDescription
Free text from the user.
ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::size_t GetNLogicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNClusterGroups() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
const std::string & GetDescription() const
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RFieldDescriptor & GetFieldZero() const
Generic information about the physical location of data.
The RNTupleModel encapulates the schema of an RNTuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:198
const Int_t n
Definition legend1.C:16
Namespace for ROOT features in testing.
Definition TROOT.h:100
ROOT::DescriptorId_t CallFindClusterIdOn(const ROOT::RNTupleDescriptor &desc, ROOT::NTupleSize_t entryIdx)
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc)
Tells if the field describes a user-defined enum type.
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc)
Tells if the field describes a std::atomic<T> type.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
ENTupleColumnType
Additional information about a page in an in-memory RPageRange.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage)
ROOT::NTupleSize_t fPageNumber
Page number in the corresponding RPageRange.
ROOT::NTupleSize_t fFirstElementIndex
Index (in cluster) of the first element in page.
void SetPageNumber(ROOT::NTupleSize_t pageNumber)
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
void SetLocator(const RNTupleLocator &locator)
bool operator==(const RPageInfo &other) const
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
const RNTupleLocator & GetLocator() const
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
bool operator==(RValueRange other) const
RValueRange(std::pair< double, double > range)
bool operator!=(RValueRange other) const
bool fForwardCompatible
By default, creating a model will fail if any of the reconstructed fields contains an unknown column ...
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fEmulateUnknownTypes
If true, fields with a user defined type that have no available dictionaries will be reconstructed as...