Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
20#include <ROOT/RColumnModel.hxx>
21#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RSpan.hxx>
25#include <string_view>
26
27#include <algorithm>
28#include <chrono>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <ostream>
34#include <vector>
35#include <set>
36#include <string>
37#include <unordered_map>
38#include <unordered_set>
39
40namespace ROOT {
41namespace Experimental {
42
43class RFieldBase;
44class RNTupleDescriptor;
45class RNTupleModel;
46
47namespace Internal {
48class RColumnElementBase;
49} // namespace Internal
50
51namespace Internal {
52class RColumnDescriptorBuilder;
53class RColumnGroupDescriptorBuilder;
54class RClusterDescriptorBuilder;
55class RClusterGroupDescriptorBuilder;
56class RExtraTypeInfoDescriptorBuilder;
57class RFieldDescriptorBuilder;
58class RNTupleDescriptorBuilder;
59} // namespace Internal
60
61// clang-format off
62/**
63\class ROOT::Experimental::RFieldDescriptor
64\ingroup NTuple
65\brief Meta-data stored for every field of an ntuple
66*/
67// clang-format on
71
72private:
74 /// The version of the C++-type-to-column translation mechanics
75 std::uint32_t fFieldVersion = 0;
76 /// The version of the C++ type itself
77 std::uint32_t fTypeVersion = 0;
78 /// The leaf name, not including parent fields
79 std::string fFieldName;
80 /// Free text set by the user
81 std::string fFieldDescription;
82 /// The C++ type that was used when writing the field
83 std::string fTypeName;
84 /// A typedef or using directive that resolved to the type name during field creation
85 std::string fTypeAlias;
86 /// The number of elements per entry for fixed-size arrays
87 std::uint64_t fNRepetitions = 0;
88 /// The structural information carried by this field in the data model tree
90 /// Establishes sub field relationships, such as classes and collections
92 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
93 /// order of sub fields.
94 std::vector<DescriptorId_t> fLinkIds;
95 /// The ordered list of columns attached to this field
96 std::vector<DescriptorId_t> fLogicalColumnIds;
97
98public:
99 RFieldDescriptor() = default;
100 RFieldDescriptor(const RFieldDescriptor &other) = delete;
104
105 bool operator==(const RFieldDescriptor &other) const;
106 /// Get a copy of the descriptor
107 RFieldDescriptor Clone() const;
108 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
109 /// access to sub fields, which is provided by the ntuple descriptor argument.
110 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
111
112 DescriptorId_t GetId() const { return fFieldId; }
113 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
114 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
115 std::string GetFieldName() const { return fFieldName; }
116 std::string GetFieldDescription() const { return fFieldDescription; }
117 std::string GetTypeName() const { return fTypeName; }
118 std::string GetTypeAlias() const { return fTypeAlias; }
119 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
122 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
123 const std::vector<DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
124};
125
126
127// clang-format off
128/**
129\class ROOT::Experimental::RColumnDescriptor
130\ingroup NTuple
131\brief Meta-data stored for every column of an ntuple
132*/
133// clang-format on
137
138private:
139 /// The actual column identifier, which is the link to the corresponding field
141 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
143 /// Contains the column type and whether it is sorted
145 /// Every column belongs to one and only one field
147 /// A field can be serialized into several columns, which are numbered from zero to $n$
148 std::uint32_t fIndex;
149 /// Specifies the index for the first stored element for this column. For deferred columns the value is greater
150 /// than 0
151 std::uint64_t fFirstElementIndex = 0U;
152
153public:
154 RColumnDescriptor() = default;
155 RColumnDescriptor(const RColumnDescriptor &other) = delete;
159
160 bool operator==(const RColumnDescriptor &other) const;
161 /// Get a copy of the descriptor
162 RColumnDescriptor Clone() const;
163
166 RColumnModel GetModel() const { return fModel; }
167 std::uint32_t GetIndex() const { return fIndex; }
170 std::uint64_t GetFirstElementIndex() const { return fFirstElementIndex; }
171 bool IsDeferredColumn() const { return fFirstElementIndex > 0; }
172};
173
174// clang-format off
175/**
176\class ROOT::Experimental::RColumnGroupDescriptor
177\ingroup NTuple
178\brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
179
180Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
181is used to denote the column group of all the columns. Every ntuple has at least one column group.
182*/
183// clang-format on
186
187private:
189 std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
190
191public:
197
198 bool operator==(const RColumnGroupDescriptor &other) const;
199
201 const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
202 bool Contains(DescriptorId_t physicalId) const
203 {
204 return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
205 }
206 bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
207};
208
209// clang-format off
210/**
211\class ROOT::Experimental::RClusterDescriptor
212\ingroup NTuple
213\brief Meta-data for a set of ntuple clusters
214
215The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
216In a second phase, the event range, column group, page locations and column ranges are added.
217Both phases are populated by the RClusterDescriptorBuilder.
218Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
219for instance when describing friend ntuples.
220*/
221// clang-format on
224
225public:
226 /// The window of element indexes of a particular column in a particular cluster
229 /// The global index of the first column element in the cluster
231 /// The number of column elements in the cluster
233 /// The usual format for ROOT compression settings (see Compression.h).
234 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
236
237 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
238 // Should this be done on the field level?
239
240 bool operator==(const RColumnRange &other) const {
243 }
244
247 }
248 };
249
250 /// Records the parition of data into pages for a particular column in a particular cluster
253 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
254 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
255 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
256 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
257 /// \return The number of column elements covered by the synthesized RPageInfos
258 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
259 std::size_t pageSize);
260
261 public:
262 /// We do not need to store the element size / uncompressed page size because we know to which column
263 /// the page belongs
264 struct RPageInfo {
265 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
266 std::uint32_t fNElements = std::uint32_t(-1);
267 /// The meaning of fLocator depends on the storage backend.
269
270 bool operator==(const RPageInfo &other) const {
271 return fNElements == other.fNElements && fLocator == other.fLocator;
272 }
273 };
275 /// Index (in cluster) of the first element in page.
277 /// Page number in the corresponding RPageRange.
279
280 RPageInfoExtended() = default;
282 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
283 {
284 }
285 };
286
287 RPageRange() = default;
288 RPageRange(const RPageRange &other) = delete;
289 RPageRange &operator =(const RPageRange &other) = delete;
290 RPageRange(RPageRange &&other) = default;
291 RPageRange &operator =(RPageRange &&other) = default;
292
295 clone.fPhysicalColumnId = fPhysicalColumnId;
296 clone.fPageInfos = fPageInfos;
297 return clone;
298 }
299
300 /// Find the page in the RPageRange that contains the given element. The element must exist.
301 RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
302
304 std::vector<RPageInfo> fPageInfos;
305
306 bool operator==(const RPageRange &other) const {
307 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
308 }
309 };
310
311private:
313 /// Clusters can be swapped by adjusting the entry offsets
315 // TODO(jblomer): change to std::uint64_t
317
318 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
319 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
320
321public:
327
329
330 bool operator==(const RClusterDescriptor &other) const;
331
332 DescriptorId_t GetId() const { return fClusterId; }
335 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
336 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
337 bool ContainsColumn(DescriptorId_t physicalId) const
338 {
339 return fColumnRanges.find(physicalId) != fColumnRanges.end();
340 }
341 std::unordered_set<DescriptorId_t> GetColumnIds() const;
342 std::uint64_t GetBytesOnStorage() const;
343};
344
345// clang-format off
346/**
347\class ROOT::Experimental::RClusterGroupDescriptor
348\ingroup NTuple
349\brief Clusters are bundled in cluster groups.
350
351Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
352may contain sharded clusters.
353Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
354the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
355*/
356// clang-format on
359
360private:
362 /// The cluster IDs can be empty if the corresponding page list is not loaded.
363 std::vector<DescriptorId_t> fClusterIds;
364 /// The page list that corresponds to the cluster group
366 /// Uncompressed size of the page list
367 std::uint64_t fPageListLength = 0;
368 /// The minimum first entry number of the clusters in the cluster group
369 std::uint64_t fMinEntry = 0;
370 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
371 std::uint64_t fEntrySpan = 0;
372 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
373 std::uint32_t fNClusters = 0;
374
375public:
381
383 // Creates a clone without the cluster IDs
385
386 bool operator==(const RClusterGroupDescriptor &other) const;
387
389 std::uint32_t GetNClusters() const { return fNClusters; }
391 std::uint64_t GetPageListLength() const { return fPageListLength; }
392 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
393 std::uint64_t GetMinEntry() const { return fMinEntry; }
394 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
395 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
396 /// Stage two loads the list of cluster IDs.
397 bool HasClusterDetails() const { return !fClusterIds.empty(); }
398};
399
400/// Used in RExtraTypeInfoDescriptor
402
403// clang-format off
404/**
405\class ROOT::Experimental::RExtraTypeInfoDescriptor
406\ingroup NTuple
407\brief Field specific extra type information from the header / extenstion header
408
409Currently only used by unsplit fields to store RNTuple-wide list of streamer info records.
410*/
411// clang-format on
414
415private:
416 /// Specifies the meaning of the extra information
418 /// Extra type information restricted to a certain version range of the type
419 std::uint32_t fTypeVersionFrom = 0;
420 std::uint32_t fTypeVersionTo = 0;
421 /// The type name the extra information refers to; empty for RNTuple-wide extra information
422 std::string fTypeName;
423 /// The content format depends on the content ID and may be binary
424 std::string fContent;
425
426public:
432
433 bool operator==(const RExtraTypeInfoDescriptor &other) const;
434
436
438 std::uint32_t GetTypeVersionFrom() const { return fTypeVersionFrom; }
439 std::uint32_t GetTypeVersionTo() const { return fTypeVersionTo; }
440 std::string GetTypeName() const { return fTypeName; }
441 std::string GetContent() const { return fContent; }
442};
443
444// clang-format off
445/**
446\class ROOT::Experimental::RNTupleDescriptor
447\ingroup NTuple
448\brief The on-storage meta-data of an ntuple
449
450Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
451several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
452relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
453its location and size, and for every column the range of element indexes as well as a list of pages and page
454locations.
455
456The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
457for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
458
459The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
460the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
461writte struct. This allows for forward and backward compatibility when the meta-data evolves.
462*/
463// clang-format on
466
467public:
468 class RHeaderExtension;
469
470private:
471 /// The ntuple name needs to be unique in a given storage location (file)
472 std::string fName;
473 /// Free text from the user
474 std::string fDescription;
475
476 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
477 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
478 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
479
480 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
481 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
482 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
483
484 DescriptorId_t fFieldZeroId = kInvalidDescriptorId; ///< Set by the descriptor builder
485
486 /**
487 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
488 * active the page locations. During the lifetime of the descriptor, page location information for clusters
489 * can be added or removed. When this happens, the generation should be increased, so that users of the
490 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
491 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
492 */
493 std::uint64_t fGeneration = 0;
494
495 std::set<unsigned int> fFeatureFlags;
496 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
497 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
498 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
499 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
500 /// from a chain of files
501 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
502 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
503 std::unique_ptr<RHeaderExtension> fHeaderExtension;
504
505public:
506 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
507
508 // clang-format off
509 /**
510 \class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
511 \ingroup NTuple
512 \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
513 */
514 // clang-format on
517
518 private:
519 /// Contains the list of field IDs that are part of the header extension; the corresponding columns are
520 /// available via `GetColumnIterable()`.
521 std::vector<DescriptorId_t> fFields;
522 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
523 std::uint64_t fNLogicalColumns = 0;
524 std::uint64_t fNPhysicalColumns = 0;
525
526 void AddFieldId(DescriptorId_t id) { fFields.push_back(id); }
527 void AddColumn(bool isAliasColumn)
528 {
530 if (!isAliasColumn)
532 }
533
534 public:
535 std::size_t GetNFields() const { return fFields.size(); }
536 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
537 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
538 /// Return a vector containing the IDs of the top-level fields defined in the extension header
539 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
540 };
541
542 // clang-format off
543 /**
544 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
545 \ingroup NTuple
546 \brief Used to loop over a field's associated columns
547 */
548 // clang-format on
550 private:
551 /// The associated NTuple for this range.
553 /// The descriptor ids of the columns ordered by index id
554 std::vector<DescriptorId_t> fColumns = {};
555
556 void CollectColumnIds(DescriptorId_t fieldId);
557 public:
558 class RIterator {
559 private:
560 /// The enclosing range's NTuple.
562 /// The enclosing range's descriptor id list.
563 const std::vector<DescriptorId_t> &fColumns;
564 std::size_t fIndex = 0;
565 public:
566 using iterator_category = std::forward_iterator_tag;
569 using difference_type = std::ptrdiff_t;
572
573 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
574 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
575 iterator operator++() { ++fIndex; return *this; }
577 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
578 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
579 };
580
583
586 };
587
588 // clang-format off
589 /**
590 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
591 \ingroup NTuple
592 \brief Used to loop over a field's child fields
593 */
594 // clang-format on
596 private:
597 /// The associated NTuple for this range.
599 /// The descriptor ids of the child fields. These may be sorted using
600 /// a comparison function.
601 std::vector<DescriptorId_t> fFieldChildren = {};
602
603 public:
604 class RIterator {
605 private:
606 /// The enclosing range's NTuple.
608 /// The enclosing range's descriptor id list.
609 const std::vector<DescriptorId_t>& fFieldChildren;
610 std::size_t fIndex = 0;
611 public:
612 using iterator_category = std::forward_iterator_tag;
615 using difference_type = std::ptrdiff_t;
618
619 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
620 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
621 iterator operator++() { ++fIndex; return *this; }
625 );
626 }
627 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
628 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
629 };
631 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
632 /// Sort the range using an arbitrary comparison function.
634 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
635 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
636 {
637 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
638 }
640 return RIterator(fNTuple, fFieldChildren, 0);
641 }
644 }
645 };
646
647 // clang-format off
648 /**
649 \class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
650 \ingroup NTuple
651 \brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
652
653 Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
654 FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
655 */
656 // clang-format on
658 private:
659 /// The associated NTuple for this range.
661
662 public:
663 class RIterator {
664 private:
665 /// The enclosing range's NTuple.
667 std::size_t fIndex = 0;
668
669 public:
670 using iterator_category = std::forward_iterator_tag;
673 using difference_type = std::ptrdiff_t;
676
677 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
679 {
680 ++fIndex;
681 return *this;
682 }
684 {
685 auto it = fNTuple.fClusterGroupDescriptors.begin();
686 std::advance(it, fIndex);
687 return it->second;
688 }
689 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
690 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
691 };
692
696 };
697
698 // clang-format off
699 /**
700 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
701 \ingroup NTuple
702 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
703
704 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
705 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
706 */
707 // clang-format on
709 private:
710 /// The associated NTuple for this range.
712 public:
713 class RIterator {
714 private:
715 /// The enclosing range's NTuple.
717 std::size_t fIndex = 0;
718 public:
719 using iterator_category = std::forward_iterator_tag;
722 using difference_type = std::ptrdiff_t;
725
726 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
727 iterator operator++() { ++fIndex; return *this; }
729 auto it = fNTuple.fClusterDescriptors.begin();
730 std::advance(it, fIndex);
731 return it->second;
732 }
733 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
734 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
735 };
736
740 };
741
742 // clang-format off
743 /**
744 \class ROOT::Experimental::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
745 \ingroup NTuple
746 \brief Used to loop over all the extra type info record of an ntuple (in unspecified order)
747 */
748 // clang-format on
750 private:
751 /// The associated NTuple for this range.
753
754 public:
755 class RIterator {
756 private:
757 /// The enclosing range's NTuple.
759 std::size_t fIndex = 0;
760
761 public:
762 using iterator_category = std::forward_iterator_tag;
765 using difference_type = std::ptrdiff_t;
768
769 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
771 {
772 ++fIndex;
773 return *this;
774 }
776 {
777 auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
778 std::advance(it, fIndex);
779 return *it;
780 }
781 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
782 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
783 };
784
788 };
789
790 RNTupleDescriptor() = default;
791 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
795
796 std::unique_ptr<RNTupleDescriptor> Clone() const;
797
798 bool operator ==(const RNTupleDescriptor &other) const;
799
800 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
801 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
802 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
803
805 return fFieldDescriptors.at(fieldId);
806 }
808 return fColumnDescriptors.at(columnId);
809 }
811 {
812 return fClusterGroupDescriptors.at(clusterGroupId);
813 }
815 return fClusterDescriptors.at(clusterId);
816 }
817
819 return RFieldDescriptorIterable(*this, fieldDesc);
820 }
822 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
823 {
824 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
825 }
827 return GetFieldIterable(GetFieldDescriptor(fieldId));
828 }
830 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
831 {
832 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
833 }
836 }
838 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
839 {
840 return GetFieldIterable(GetFieldZeroId(), comparator);
841 }
842
844 {
845 return RColumnDescriptorIterable(*this);
846 }
848 {
849 return RColumnDescriptorIterable(*this, fieldDesc);
850 }
852 {
853 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
854 }
855
857
859 {
860 return RClusterDescriptorIterable(*this);
861 }
862
864
865 std::string GetName() const { return fName; }
866 std::string GetDescription() const { return fDescription; }
867
868 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
869 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
870 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
871 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
872 std::size_t GetNClusters() const { return fNClusters; }
873 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
874 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
875
876 /// We know the number of entries from adding the cluster summaries
878 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
879
880 /// Returns the logical parent of all top-level NTuple data fields.
883 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
884 /// Searches for a top-level field
885 DescriptorId_t FindFieldId(std::string_view fieldName) const;
886 DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
887 DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
891
892 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
893 /// In case of invalid field ID, an empty string is returned.
894 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
895
896 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
897 std::vector<std::uint64_t> GetFeatureFlags() const;
898
899 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
900 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
901
902 /// Methods to load and drop cluster group details (cluster IDs and page locations)
903 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
905
906 std::uint64_t GetGeneration() const { return fGeneration; }
908
909 /// Re-create the C++ model from the stored meta-data
910 std::unique_ptr<RNTupleModel> CreateModel() const;
911 void PrintInfo(std::ostream &output) const;
912};
913
914namespace Internal {
915
916// clang-format off
917/**
918\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
919\ingroup NTuple
920\brief A helper class for piece-wise construction of an RColumnDescriptor
921
922Dangling column descriptors can become actual descriptors when added to an
923RNTupleDescriptorBuilder instance and then linked to their fields.
924*/
925// clang-format on
927private:
929public:
930 /// Make an empty column descriptor builder.
932
934 {
935 fColumn.fLogicalColumnId = logicalColumnId;
936 return *this;
937 }
939 {
940 fColumn.fPhysicalColumnId = physicalColumnId;
941 return *this;
942 }
944 fColumn.fModel = model;
945 return *this;
946 }
948 fColumn.fFieldId = fieldId;
949 return *this;
950 }
953 return *this;
954 }
955 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
956 {
957 fColumn.fFirstElementIndex = firstElementIdx;
958 return *this;
959 }
961 /// Attempt to make a column descriptor. This may fail if the column
962 /// was not given enough information to make a proper descriptor.
964};
965
966
967// clang-format off
968/**
969\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
970\ingroup NTuple
971\brief A helper class for piece-wise construction of an RFieldDescriptor
972
973Dangling field descriptors describe a single field in isolation. They are
974missing the necessary relationship information (parent field, any child fields)
975required to describe a real NTuple field.
976
977Dangling field descriptors can only become actual descriptors when added to an
978RNTupleDescriptorBuilder instance and then linked to other fields.
979*/
980// clang-format on
982private:
984public:
985 /// Make an empty dangling field descriptor.
987 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
988 /// Relationship information is lost during the conversion to a
989 /// dangling descriptor:
990 /// * Parent id is reset to an invalid id.
991 /// * Field children ids are forgotten.
992 ///
993 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
994 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
995
996 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
997 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
998
1000 fField.fFieldId = fieldId;
1001 return *this;
1002 }
1003 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1004 {
1005 fField.fFieldVersion = fieldVersion;
1006 return *this;
1007 }
1008 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1009 {
1010 fField.fTypeVersion = typeVersion;
1011 return *this;
1012 }
1015 return *this;
1016 }
1017 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
1018 fField.fFieldName = fieldName;
1019 return *this;
1020 }
1021 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
1022 fField.fFieldDescription = fieldDescription;
1023 return *this;
1024 }
1025 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
1026 fField.fTypeName = typeName;
1027 return *this;
1028 }
1029 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1030 {
1031 fField.fTypeAlias = typeAlias;
1032 return *this;
1033 }
1034 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
1035 fField.fNRepetitions = nRepetitions;
1036 return *this;
1037 }
1039 fField.fStructure = structure;
1040 return *this;
1041 }
1043 /// Attempt to make a field descriptor. This may fail if the dangling field
1044 /// was not given enough information to make a proper descriptor.
1046};
1047
1048
1049// clang-format off
1050/**
1051\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
1052\ingroup NTuple
1053\brief A helper class for piece-wise construction of an RClusterDescriptor
1054
1055The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1056piecewise addition of page locations.
1057*/
1058// clang-format on
1060private:
1062
1063public:
1065 {
1066 fCluster.fClusterId = clusterId;
1067 return *this;
1068 }
1069
1070 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1071 {
1072 fCluster.fFirstEntryIndex = firstEntryIndex;
1073 return *this;
1074 }
1075
1076 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1077 {
1078 fCluster.fNEntries = nEntries;
1079 return *this;
1080 }
1081
1082 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1083 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1084
1085 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1086 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1087 /// 'zero' page from such locator. Any call to `CommitColumnRange()` should happen before calling this function.
1089
1090 /// Move out the full cluster descriptor including page locations
1092};
1093
1094// clang-format off
1095/**
1096\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
1097\ingroup NTuple
1098\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1099*/
1100// clang-format on
1102private:
1104
1105public:
1108
1110 {
1111 fClusterGroup.fClusterGroupId = clusterGroupId;
1112 return *this;
1113 }
1115 {
1116 fClusterGroup.fPageListLocator = pageListLocator;
1117 return *this;
1118 }
1119 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1120 {
1121 fClusterGroup.fPageListLength = pageListLength;
1122 return *this;
1123 }
1125 {
1126 fClusterGroup.fMinEntry = minEntry;
1127 return *this;
1128 }
1130 {
1131 fClusterGroup.fEntrySpan = entrySpan;
1132 return *this;
1133 }
1135 {
1136 fClusterGroup.fNClusters = nClusters;
1137 return *this;
1138 }
1139 void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1140 {
1141 if (clusterIds.size() != fClusterGroup.GetNClusters())
1142 throw RException(R__FAIL("mismatch of number of clusters"));
1143 fClusterGroup.fClusterIds = clusterIds;
1144 }
1145
1147};
1148
1149// clang-format off
1150/**
1151\class ROOT::Experimental::Internal::RColumnGroupDescriptorBuilder
1152\ingroup NTuple
1153\brief A helper class for piece-wise construction of an RColumnGroupDescriptor
1154*/
1155// clang-format on
1157private:
1159
1160public:
1162
1164 {
1165 fColumnGroup.fColumnGroupId = columnGroupId;
1166 return *this;
1167 }
1168 void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
1169
1171};
1172
1173// clang-format off
1174/**
1175\class ROOT::Experimental::Internal::RExtraTypeInfoDescriptorBuilder
1176\ingroup NTuple
1177\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1178*/
1179// clang-format on
1181private:
1183
1184public:
1186
1188 {
1189 fExtraTypeInfo.fContentId = contentId;
1190 return *this;
1191 }
1192 RExtraTypeInfoDescriptorBuilder &TypeVersionFrom(std::uint32_t typeVersionFrom)
1193 {
1194 fExtraTypeInfo.fTypeVersionFrom = typeVersionFrom;
1195 return *this;
1196 }
1198 {
1199 fExtraTypeInfo.fTypeVersionTo = typeVersionTo;
1200 return *this;
1201 }
1202 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1203 {
1204 fExtraTypeInfo.fTypeName = typeName;
1205 return *this;
1206 }
1207 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1208 {
1209 fExtraTypeInfo.fContent = content;
1210 return *this;
1211 }
1212
1214};
1215
1216// clang-format off
1217/**
1218\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1219\ingroup NTuple
1220\brief A helper class for piece-wise construction of an RNTupleDescriptor
1221
1222Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1223*/
1224// clang-format on
1226private:
1229 // Called by AddColumn() to populate the fLogicalFieldIds member of the field descriptor
1230 RResult<void> AttachColumn(DescriptorId_t fieldId, const RColumnDescriptor &columnDesc);
1231
1232public:
1233 /// Checks whether invariants hold:
1234 /// * NTuple name is valid
1235 /// * Fields have valid parent and child ids
1239
1240 void SetNTuple(const std::string_view name, const std::string_view description);
1241 void SetFeature(unsigned int flag);
1242
1243 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1245 /// The real footer size also include the page list envelopes
1247
1248 void AddField(const RFieldDescriptor& fieldDesc);
1250
1251 // For both AddColumn() methods, the field has to be already available. For fields with multiple columns,
1252 // the columns need to be added in order of the column index
1254 const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx = 0U);
1256
1259
1261
1262 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1263 void Reset();
1264
1265 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1266 /// annotated as begin part of the header extension.
1267 void BeginHeaderExtension();
1268};
1269
1270} // namespace Internal
1271} // namespace Experimental
1272} // namespace ROOT
1273
1274#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
void AddClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersionTo(std::uint32_t typeVersionTo)
RExtraTypeInfoDescriptorBuilder & TypeVersionFrom(std::uint32_t typeVersionFrom)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > AttachColumn(DescriptorId_t fieldId, const RColumnDescriptor &columnDesc)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
Records the parition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::uint64_t fFirstElementIndex
Specifies the index for the first stored element for this column.
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool Contains(DescriptorId_t physicalId) const
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
std::uint32_t fTypeVersionFrom
Extra type information restricted to a certain version range of the type.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RField.hxx:99
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::uint32_t fTypeVersion
The version of the C++ type itself.
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
Used to loop over all the extra type info record of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
std::vector< DescriptorId_t > fFields
Contains the list of field IDs that are part of the header extension; the corresponding columns are a...
std::uint64_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RNTupleModel > CreateModel() const
Re-create the C++ model from the stored meta-data.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RColumnDescriptorIterable GetColumnIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
ClusterSize_t::ValueType fFirstInPage
Index (in cluster) of the first element in page.
RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
Generic information about the physical location of data.
static void output()