Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24
25#include <TError.h>
26
27#include <algorithm>
28#include <chrono>
29#include <cmath>
30#include <functional>
31#include <iterator>
32#include <map>
33#include <memory>
34#include <optional>
35#include <ostream>
36#include <vector>
37#include <set>
38#include <string>
39#include <string_view>
40#include <unordered_map>
41#include <unordered_set>
42
43namespace ROOT {
44namespace Experimental {
45
46class RFieldBase;
47class RNTupleDescriptor;
48class RNTupleModel;
49
50namespace Internal {
51class RColumnElementBase;
52} // namespace Internal
53
54namespace Internal {
55class RColumnDescriptorBuilder;
56class RColumnGroupDescriptorBuilder;
57class RClusterDescriptorBuilder;
58class RClusterGroupDescriptorBuilder;
59class RExtraTypeInfoDescriptorBuilder;
60class RFieldDescriptorBuilder;
61class RNTupleDescriptorBuilder;
62} // namespace Internal
63
64// clang-format off
65/**
66\class ROOT::Experimental::RFieldDescriptor
67\ingroup NTuple
68\brief Meta-data stored for every field of an ntuple
69*/
70// clang-format on
74
75private:
77 /// The version of the C++-type-to-column translation mechanics
78 std::uint32_t fFieldVersion = 0;
79 /// The version of the C++ type itself
80 std::uint32_t fTypeVersion = 0;
81 /// The leaf name, not including parent fields
82 std::string fFieldName;
83 /// Free text set by the user
84 std::string fFieldDescription;
85 /// The C++ type that was used when writing the field
86 std::string fTypeName;
87 /// A typedef or using directive that resolved to the type name during field creation
88 std::string fTypeAlias;
89 /// The number of elements per entry for fixed-size arrays
90 std::uint64_t fNRepetitions = 0;
91 /// The structural information carried by this field in the data model tree
93 /// Establishes sub field relationships, such as classes and collections
95 /// For projected fields, the source field ID
97 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
98 /// order of sub fields.
99 std::vector<DescriptorId_t> fLinkIds;
100 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
101 /// list of logical column ids. For example, the second column of the third column representation is
102 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
103 std::uint32_t fColumnCardinality = 0;
104 /// The ordered list of columns attached to this field: first by representation index then by column index.
105 std::vector<DescriptorId_t> fLogicalColumnIds;
106 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
107 /// identify types by their checksum
108 std::optional<std::uint32_t> fTypeChecksum;
109
110public:
111 RFieldDescriptor() = default;
112 RFieldDescriptor(const RFieldDescriptor &other) = delete;
116
117 bool operator==(const RFieldDescriptor &other) const;
118 /// Get a copy of the descriptor
119 RFieldDescriptor Clone() const;
120 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
121 /// access to sub fields, which is provided by the ntuple descriptor argument.
122 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
123
124 DescriptorId_t GetId() const { return fFieldId; }
125 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
126 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
127 const std::string &GetFieldName() const { return fFieldName; }
128 const std::string &GetFieldDescription() const { return fFieldDescription; }
129 const std::string &GetTypeName() const { return fTypeName; }
130 const std::string &GetTypeAlias() const { return fTypeAlias; }
131 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
135 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
136 const std::vector<DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
137 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
138 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
140 /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
141 /// natively supported stdlib classes.
142 /// The dictionary does not need to be available for this method.
143 bool IsCustomClass() const;
144};
145
146// clang-format off
147/**
148\class ROOT::Experimental::RColumnDescriptor
149\ingroup NTuple
150\brief Meta-data stored for every column of an ntuple
151*/
152// clang-format on
156
157private:
158 /// The actual column identifier, which is the link to the corresponding field
160 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
162 /// Every column belongs to one and only one field
164 /// The absolute value specifies the index for the first stored element for this column.
165 /// For deferred columns the absolute value is larger than zero.
166 /// Negative values specify a suppressed and deferred column.
167 std::int64_t fFirstElementIndex = 0U;
168 /// A field can be serialized into several columns, which are numbered from zero to $n$
169 std::uint32_t fIndex = 0;
170 /// A field may use multiple column representations, which are numbered from zero to $m$.
171 /// Every representation has the same number of columns.
172 std::uint16_t fRepresentationIndex = 0;
173 /// The size in bits of elements of this column. Most columns have the size fixed by their type
174 /// but low-precision float columns have variable bit widths.
175 std::uint16_t fBitsOnStorage = 0;
176 /// The on-disk column type
178
179public:
180 RColumnDescriptor() = default;
181 RColumnDescriptor(const RColumnDescriptor &other) = delete;
185
186 bool operator==(const RColumnDescriptor &other) const;
187 /// Get a copy of the descriptor
188 RColumnDescriptor Clone() const;
189
193 std::uint32_t GetIndex() const { return fIndex; }
194 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
195 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
196 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
197 EColumnType GetType() const { return fType; }
199 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
201};
202
203// clang-format off
204/**
205\class ROOT::Experimental::RColumnGroupDescriptor
206\ingroup NTuple
207\brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
208
209Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
210is used to denote the column group of all the columns. Every ntuple has at least one column group.
211*/
212// clang-format on
215
216private:
218 std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
219
220public:
226
227 bool operator==(const RColumnGroupDescriptor &other) const;
228
230 const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
231 bool Contains(DescriptorId_t physicalId) const
232 {
233 return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
234 }
235 bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
236};
237
238// clang-format off
239/**
240\class ROOT::Experimental::RClusterDescriptor
241\ingroup NTuple
242\brief Meta-data for a set of ntuple clusters
243
244The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
245In a second phase, the event range, column group, page locations and column ranges are added.
246Both phases are populated by the RClusterDescriptorBuilder.
247Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
248for instance when describing friend ntuples.
249*/
250// clang-format on
253
254public:
255 /// The window of element indexes of a particular column in a particular cluster
258 /// The global index of the first column element in the cluster
260 /// The number of column elements in the cluster
262 /// The usual format for ROOT compression settings (see Compression.h).
263 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
265 /// Suppressed columns have an empty page range and unknown compression settings.
266 /// Their element index range, however, is aligned with the corresponding column of the
267 /// primary column representation (see Section "Suppressed Columns" in the specification)
268 bool fIsSuppressed = false;
269
270 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
271 // Should this be done on the field level?
272
273 bool operator==(const RColumnRange &other) const
274 {
278 }
279
281 {
283 }
284 };
285
286 // clang-format off
287 /**
288 \class ROOT::Experimental::RClusterDescriptor::RPageRange
289 \ingroup NTuple
290 \brief Records the partition of data into pages for a particular column in a particular cluster
291 */
292 // clang-format on
295 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
296 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
297 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
298 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
299 /// \return The number of column elements covered by the synthesized RPageInfos
300 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
301 std::size_t pageSize);
302
303 public:
304 /// We do not need to store the element size / uncompressed page size because we know to which column
305 /// the page belongs
306 struct RPageInfo {
307 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
308 std::uint32_t fNElements = std::uint32_t(-1);
309 /// The meaning of fLocator depends on the storage backend.
311 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
312 bool fHasChecksum = false;
313
314 bool operator==(const RPageInfo &other) const
315 {
316 return fNElements == other.fNElements && fLocator == other.fLocator;
317 }
318 };
320 /// Index (in cluster) of the first element in page.
322 /// Page number in the corresponding RPageRange.
324
325 RPageInfoExtended() = default;
327 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
328 {
329 }
330 };
331
332 RPageRange() = default;
333 RPageRange(const RPageRange &other) = delete;
334 RPageRange &operator=(const RPageRange &other) = delete;
335 RPageRange(RPageRange &&other) = default;
336 RPageRange &operator=(RPageRange &&other) = default;
337
339 {
340 RPageRange clone;
342 clone.fPageInfos = fPageInfos;
343 return clone;
344 }
345
346 /// Find the page in the RPageRange that contains the given element. The element must exist.
347 RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
348
350 std::vector<RPageInfo> fPageInfos;
351
352 bool operator==(const RPageRange &other) const
353 {
354 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
355 }
356 };
357
358private:
360 /// Clusters can be swapped by adjusting the entry offsets
362 // TODO(jblomer): change to std::uint64_t
364
365 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
366 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
367
368public:
370
376
378
379 bool operator==(const RClusterDescriptor &other) const;
380
381 DescriptorId_t GetId() const { return fClusterId; }
384 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
385 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
386 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
387 RColumnRangeIterable GetColumnRangeIterable() const;
388 bool ContainsColumn(DescriptorId_t physicalId) const
389 {
390 return fColumnRanges.find(physicalId) != fColumnRanges.end();
391 }
392 std::uint64_t GetBytesOnStorage() const;
393};
394
396private:
398
399public:
400 class RIterator {
401 private:
402 using Iter_t = std::unordered_map<DescriptorId_t, RColumnRange>::const_iterator;
403 /// The wrapped map iterator
405
406 public:
407 using iterator_category = std::forward_iterator_tag;
410 using difference_type = std::ptrdiff_t;
411 using pointer = const RColumnRange *;
412 using reference = const RColumnRange &;
413
414 RIterator(Iter_t iter) : fIter(iter) {}
416 {
417 ++fIter;
418 return *this;
419 }
420 reference operator*() { return fIter->second; }
421 pointer operator->() { return &fIter->second; }
422 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
423 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
424 };
425
426 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
427
429 RIterator end() { return fDesc.fColumnRanges.cend(); }
430 size_t count() { return fDesc.fColumnRanges.size(); }
431};
432
433// clang-format off
434/**
435\class ROOT::Experimental::RClusterGroupDescriptor
436\ingroup NTuple
437\brief Clusters are bundled in cluster groups.
438
439Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
440may contain sharded clusters.
441Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
442the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
443*/
444// clang-format on
447
448private:
450 /// The cluster IDs can be empty if the corresponding page list is not loaded.
451 std::vector<DescriptorId_t> fClusterIds;
452 /// The page list that corresponds to the cluster group
454 /// Uncompressed size of the page list
455 std::uint64_t fPageListLength = 0;
456 /// The minimum first entry number of the clusters in the cluster group
457 std::uint64_t fMinEntry = 0;
458 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
459 std::uint64_t fEntrySpan = 0;
460 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
461 std::uint32_t fNClusters = 0;
462
463public:
469
471 // Creates a clone without the cluster IDs
473
474 bool operator==(const RClusterGroupDescriptor &other) const;
475
477 std::uint32_t GetNClusters() const { return fNClusters; }
479 std::uint64_t GetPageListLength() const { return fPageListLength; }
480 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
481 std::uint64_t GetMinEntry() const { return fMinEntry; }
482 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
483 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
484 /// Stage two loads the list of cluster IDs.
485 bool HasClusterDetails() const { return !fClusterIds.empty(); }
486};
487
488/// Used in RExtraTypeInfoDescriptor
490
491// clang-format off
492/**
493\class ROOT::Experimental::RExtraTypeInfoDescriptor
494\ingroup NTuple
495\brief Field specific extra type information from the header / extenstion header
496
497Currently only used by unsplit fields to store RNTuple-wide list of streamer info records.
498*/
499// clang-format on
502
503private:
504 /// Specifies the meaning of the extra information
506 /// Extra type information restricted to a certain version range of the type
507 std::uint32_t fTypeVersionFrom = 0;
508 std::uint32_t fTypeVersionTo = 0;
509 /// The type name the extra information refers to; empty for RNTuple-wide extra information
510 std::string fTypeName;
511 /// The content format depends on the content ID and may be binary
512 std::string fContent;
513
514public:
520
521 bool operator==(const RExtraTypeInfoDescriptor &other) const;
522
524
526 std::uint32_t GetTypeVersionFrom() const { return fTypeVersionFrom; }
527 std::uint32_t GetTypeVersionTo() const { return fTypeVersionTo; }
528 const std::string &GetTypeName() const { return fTypeName; }
529 const std::string &GetContent() const { return fContent; }
530};
531
532// clang-format off
533/**
534\class ROOT::Experimental::RNTupleDescriptor
535\ingroup NTuple
536\brief The on-storage meta-data of an ntuple
537
538Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
539several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
540relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
541its location and size, and for every column the range of element indexes as well as a list of pages and page
542locations.
543
544The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
545for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
546
547The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
548the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
549writte struct. This allows for forward and backward compatibility when the meta-data evolves.
550*/
551// clang-format on
554
555public:
556 class RHeaderExtension;
557
558private:
559 /// The ntuple name needs to be unique in a given storage location (file)
560 std::string fName;
561 /// Free text from the user
562 std::string fDescription;
563
564 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
565 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
566 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
567
568 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
569 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
570 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
571
572 DescriptorId_t fFieldZeroId = kInvalidDescriptorId; ///< Set by the descriptor builder
573
574 /**
575 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
576 * active the page locations. During the lifetime of the descriptor, page location information for clusters
577 * can be added or removed. When this happens, the generation should be increased, so that users of the
578 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
579 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
580 */
581 std::uint64_t fGeneration = 0;
582
583 std::set<unsigned int> fFeatureFlags;
584 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
585 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
586 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
587 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
588 /// from a chain of files
589 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
590 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
591 std::unique_ptr<RHeaderExtension> fHeaderExtension;
592
593public:
594 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
595
601
602 /// Modifiers passed to `CreateModel`
604 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
605 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
606 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
608 };
609
610 RNTupleDescriptor() = default;
611 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
615
616 std::unique_ptr<RNTupleDescriptor> Clone() const;
617
618 bool operator==(const RNTupleDescriptor &other) const;
619
620 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
621 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
622 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
623
624 const RFieldDescriptor &GetFieldDescriptor(DescriptorId_t fieldId) const { return fFieldDescriptors.at(fieldId); }
626 {
627 return fColumnDescriptors.at(columnId);
628 }
630 {
631 return fClusterGroupDescriptors.at(clusterGroupId);
632 }
634 {
635 return fClusterDescriptors.at(clusterId);
636 }
637
638 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
639 RFieldDescriptorIterable
640 GetFieldIterable(const RFieldDescriptor &fieldDesc,
641 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
642 RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const;
643 RFieldDescriptorIterable
645 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
646
647 RFieldDescriptorIterable GetTopLevelFields() const;
648 RFieldDescriptorIterable
649 GetTopLevelFields(const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
650
651 RColumnDescriptorIterable GetColumnIterable() const;
652 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
653 RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const;
654
655 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
656
657 RClusterDescriptorIterable GetClusterIterable() const;
658
659 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
660
661 const std::string &GetName() const { return fName; }
662 const std::string &GetDescription() const { return fDescription; }
663
664 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
665 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
666 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
667 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
668 std::size_t GetNClusters() const { return fNClusters; }
669 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
670 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
671
672 /// We know the number of entries from adding the cluster summaries
674 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
675
676 /// Returns the logical parent of all top-level NTuple data fields.
679 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
680 /// Searches for a top-level field
681 DescriptorId_t FindFieldId(std::string_view fieldName) const;
683 FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
685 FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
689
690 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
691 /// In case of invalid field ID, an empty string is returned.
692 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
693
694 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
695 std::vector<std::uint64_t> GetFeatureFlags() const;
696
697 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
698 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
699
700 /// Methods to load and drop cluster group details (cluster IDs and page locations)
701 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
703
704 std::uint64_t GetGeneration() const { return fGeneration; }
706
707 /// Re-create the C++ model from the stored meta-data
708 std::unique_ptr<RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
709 void PrintInfo(std::ostream &output) const;
710};
711
712// clang-format off
713/**
714\class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
715\ingroup NTuple
716\brief Used to loop over a field's associated columns
717*/
718// clang-format on
720private:
721 /// The associated NTuple for this range.
723 /// The descriptor ids of the columns ordered by field, representation, and column index
724 std::vector<DescriptorId_t> fColumns = {};
725
726public:
727 class RIterator {
728 private:
729 /// The enclosing range's NTuple.
731 /// The enclosing range's descriptor id list.
732 const std::vector<DescriptorId_t> &fColumns;
733 std::size_t fIndex = 0;
734
735 public:
736 using iterator_category = std::forward_iterator_tag;
739 using difference_type = std::ptrdiff_t;
742
743 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
744 : fNTuple(ntuple), fColumns(columns), fIndex(index)
745 {
746 }
748 {
749 ++fIndex;
750 return *this;
751 }
753 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
754 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
755 };
756
757 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
759
762 size_t count() { return fColumns.size(); }
763};
764
765// clang-format off
766/**
767\class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
768\ingroup NTuple
769\brief Used to loop over a field's child fields
770*/
771// clang-format on
773private:
774 /// The associated NTuple for this range.
776 /// The descriptor ids of the child fields. These may be sorted using
777 /// a comparison function.
778 std::vector<DescriptorId_t> fFieldChildren = {};
779
780public:
781 class RIterator {
782 private:
783 /// The enclosing range's NTuple.
785 /// The enclosing range's descriptor id list.
786 const std::vector<DescriptorId_t> &fFieldChildren;
787 std::size_t fIndex = 0;
788
789 public:
790 using iterator_category = std::forward_iterator_tag;
793 using difference_type = std::ptrdiff_t;
796
797 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &fieldChildren, std::size_t index)
798 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
799 {
800 }
802 {
803 ++fIndex;
804 return *this;
805 }
807 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
808 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
809 };
811 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
812 {
813 }
814 /// Sort the range using an arbitrary comparison function.
816 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator)
817 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
818 {
819 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
820 }
823};
824
825// clang-format off
826/**
827\class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
828\ingroup NTuple
829\brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
830
831Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
832FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
833*/
834// clang-format on
836private:
837 /// The associated NTuple for this range.
839
840public:
841 class RIterator {
842 private:
843 /// The enclosing range's NTuple.
845 std::size_t fIndex = 0;
846
847 public:
848 using iterator_category = std::forward_iterator_tag;
851 using difference_type = std::ptrdiff_t;
854
855 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
857 {
858 ++fIndex;
859 return *this;
860 }
862 {
863 auto it = fNTuple.fClusterGroupDescriptors.begin();
864 std::advance(it, fIndex);
865 return it->second;
866 }
867 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
868 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
869 };
870
874};
875
876// clang-format off
877/**
878\class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
879\ingroup NTuple
880\brief Used to loop over all the clusters of an ntuple (in unspecified order)
881
882Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
883FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
884*/
885// clang-format on
887private:
888 /// The associated NTuple for this range.
890
891public:
892 class RIterator {
893 private:
894 /// The enclosing range's NTuple.
896 std::size_t fIndex = 0;
897
898 public:
899 using iterator_category = std::forward_iterator_tag;
902 using difference_type = std::ptrdiff_t;
905
906 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
908 {
909 ++fIndex;
910 return *this;
911 }
913 {
914 auto it = fNTuple.fClusterDescriptors.begin();
915 std::advance(it, fIndex);
916 return it->second;
917 }
918 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
919 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
920 };
921
925};
926
927// clang-format off
928/**
929\class ROOT::Experimental::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
930\ingroup NTuple
931\brief Used to loop over all the extra type info record of an ntuple (in unspecified order)
932*/
933// clang-format on
935private:
936 /// The associated NTuple for this range.
938
939public:
940 class RIterator {
941 private:
942 /// The enclosing range's NTuple.
944 std::size_t fIndex = 0;
945
946 public:
947 using iterator_category = std::forward_iterator_tag;
950 using difference_type = std::ptrdiff_t;
953
954 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
956 {
957 ++fIndex;
958 return *this;
959 }
961 {
962 auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
963 std::advance(it, fIndex);
964 return *it;
965 }
966 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
967 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
968 };
969
973};
974
975// clang-format off
976/**
977\class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
978\ingroup NTuple
979\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
980*/
981// clang-format on
984
985private:
986 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
987 /// the fields in that order.
988 std::vector<DescriptorId_t> fFieldIdsOrder;
989 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
990 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
991 /// belongs to a field of the regular header that gets extended by additional column representations.
992 std::unordered_set<DescriptorId_t> fFieldIdsLookup;
993 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
994 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
995 /// these columns need to be serialized in the extension header without re-serializing the field.
996 std::vector<DescriptorId_t> fExtendedColumnRepresentations;
997 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
998 std::uint32_t fNLogicalColumns = 0;
999 std::uint32_t fNPhysicalColumns = 0;
1000
1001 void AddExtendedField(const RFieldDescriptor &fieldDesc)
1002 {
1003 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1004 fFieldIdsLookup.insert(fieldDesc.GetId());
1005 }
1006
1007 void AddExtendedColumn(const RColumnDescriptor &columnDesc)
1008 {
1010 if (!columnDesc.IsAliasColumn())
1012 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1013 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1014 }
1015 }
1016
1017public:
1018 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1019 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1020 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1021 const std::vector<DescriptorId_t> &GetExtendedColumnRepresentations() const
1022 {
1024 }
1025 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1026 /// of their addition.
1027 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1028 /// the field is not yet linked into the schema tree.
1029 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1030};
1031
1032namespace Internal {
1033
1034// clang-format off
1035/**
1036\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
1037\ingroup NTuple
1038\brief A helper class for piece-wise construction of an RColumnDescriptor
1039
1040Dangling column descriptors can become actual descriptors when added to an
1041RNTupleDescriptorBuilder instance and then linked to their fields.
1042*/
1043// clang-format on
1045private:
1047
1048public:
1049 /// Make an empty column descriptor builder.
1051
1053 {
1054 fColumn.fLogicalColumnId = logicalColumnId;
1055 return *this;
1056 }
1058 {
1059 fColumn.fPhysicalColumnId = physicalColumnId;
1060 return *this;
1061 }
1062 RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1063 {
1064 fColumn.fBitsOnStorage = bitsOnStorage;
1065 return *this;
1066 }
1068 {
1069 fColumn.fType = type;
1070 return *this;
1071 }
1073 {
1074 fColumn.fFieldId = fieldId;
1075 return *this;
1076 }
1078 {
1080 return *this;
1081 }
1082 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1083 {
1084 fColumn.fFirstElementIndex = firstElementIdx;
1085 return *this;
1086 }
1088 {
1092 return *this;
1093 }
1094 RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1095 {
1096 fColumn.fRepresentationIndex = representationIndex;
1097 return *this;
1098 }
1101 /// Attempt to make a column descriptor. This may fail if the column
1102 /// was not given enough information to make a proper descriptor.
1104};
1105
1106// clang-format off
1107/**
1108\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
1109\ingroup NTuple
1110\brief A helper class for piece-wise construction of an RFieldDescriptor
1111
1112Dangling field descriptors describe a single field in isolation. They are
1113missing the necessary relationship information (parent field, any child fields)
1114required to describe a real NTuple field.
1115
1116Dangling field descriptors can only become actual descriptors when added to an
1117RNTupleDescriptorBuilder instance and then linked to other fields.
1118*/
1119// clang-format on
1121private:
1123
1124public:
1125 /// Make an empty dangling field descriptor.
1127 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1128 /// Relationship information is lost during the conversion to a
1129 /// dangling descriptor:
1130 /// * Parent id is reset to an invalid id.
1131 /// * Field children ids are forgotten.
1132 ///
1133 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1134 explicit RFieldDescriptorBuilder(const RFieldDescriptor &fieldDesc);
1135
1136 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
1137 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
1138
1140 {
1141 fField.fFieldId = fieldId;
1142 return *this;
1143 }
1144 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1145 {
1146 fField.fFieldVersion = fieldVersion;
1147 return *this;
1148 }
1149 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1150 {
1151 fField.fTypeVersion = typeVersion;
1152 return *this;
1153 }
1155 {
1157 return *this;
1158 }
1160 {
1162 return *this;
1163 }
1164 RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1165 {
1166 fField.fFieldName = fieldName;
1167 return *this;
1168 }
1169 RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1170 {
1171 fField.fFieldDescription = fieldDescription;
1172 return *this;
1173 }
1174 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1175 {
1176 fField.fTypeName = typeName;
1177 return *this;
1178 }
1179 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1180 {
1181 fField.fTypeAlias = typeAlias;
1182 return *this;
1183 }
1184 RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1185 {
1186 fField.fNRepetitions = nRepetitions;
1187 return *this;
1188 }
1190 {
1191 fField.fStructure = structure;
1192 return *this;
1193 }
1194 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1195 {
1196 fField.fTypeChecksum = typeChecksum;
1197 return *this;
1198 }
1200 /// Attempt to make a field descriptor. This may fail if the dangling field
1201 /// was not given enough information to make a proper descriptor.
1203};
1204
1205// clang-format off
1206/**
1207\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
1208\ingroup NTuple
1209\brief A helper class for piece-wise construction of an RClusterDescriptor
1210
1211The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1212piecewise addition of page locations.
1213*/
1214// clang-format on
1216private:
1218
1219public:
1221 {
1222 fCluster.fClusterId = clusterId;
1223 return *this;
1224 }
1225
1226 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1227 {
1228 fCluster.fFirstEntryIndex = firstEntryIndex;
1229 return *this;
1230 }
1231
1232 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1233 {
1234 fCluster.fNEntries = nEntries;
1235 return *this;
1236 }
1237
1238 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1239 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1240
1241 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1242 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1243 /// MarkSuppressedColumnRange() took place.
1245
1246 /// Sets the first element index and number of elements for all the suppressed column ranges.
1247 /// The information is taken from the corresponding columns from the primary representation.
1248 /// Needs to be called when all the columns (suppressed and regular) where added.
1250
1251 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1252 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1253 /// 'zero' page from such locator. Any call to `CommitColumnRange()` and `CommitSuppressedColumnRanges()`
1254 /// should happen before calling this function.
1256
1258 {
1259 return fCluster.GetColumnRange(physicalId);
1260 }
1261
1262 /// Move out the full cluster descriptor including page locations
1264};
1265
1266// clang-format off
1267/**
1268\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
1269\ingroup NTuple
1270\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1271*/
1272// clang-format on
1274private:
1276
1277public:
1280
1282 {
1283 fClusterGroup.fClusterGroupId = clusterGroupId;
1284 return *this;
1285 }
1287 {
1288 fClusterGroup.fPageListLocator = pageListLocator;
1289 return *this;
1290 }
1291 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1292 {
1293 fClusterGroup.fPageListLength = pageListLength;
1294 return *this;
1295 }
1297 {
1298 fClusterGroup.fMinEntry = minEntry;
1299 return *this;
1300 }
1302 {
1303 fClusterGroup.fEntrySpan = entrySpan;
1304 return *this;
1305 }
1307 {
1308 fClusterGroup.fNClusters = nClusters;
1309 return *this;
1310 }
1311 void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1312 {
1313 if (clusterIds.size() != fClusterGroup.GetNClusters())
1314 throw RException(R__FAIL("mismatch of number of clusters"));
1315 fClusterGroup.fClusterIds = clusterIds;
1316 }
1317
1319};
1320
1321// clang-format off
1322/**
1323\class ROOT::Experimental::Internal::RColumnGroupDescriptorBuilder
1324\ingroup NTuple
1325\brief A helper class for piece-wise construction of an RColumnGroupDescriptor
1326*/
1327// clang-format on
1329private:
1331
1332public:
1334
1336 {
1337 fColumnGroup.fColumnGroupId = columnGroupId;
1338 return *this;
1339 }
1340 void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
1341
1343};
1344
1345// clang-format off
1346/**
1347\class ROOT::Experimental::Internal::RExtraTypeInfoDescriptorBuilder
1348\ingroup NTuple
1349\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1350*/
1351// clang-format on
1353private:
1355
1356public:
1358
1360 {
1361 fExtraTypeInfo.fContentId = contentId;
1362 return *this;
1363 }
1364 RExtraTypeInfoDescriptorBuilder &TypeVersionFrom(std::uint32_t typeVersionFrom)
1365 {
1366 fExtraTypeInfo.fTypeVersionFrom = typeVersionFrom;
1367 return *this;
1368 }
1370 {
1371 fExtraTypeInfo.fTypeVersionTo = typeVersionTo;
1372 return *this;
1373 }
1374 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1375 {
1376 fExtraTypeInfo.fTypeName = typeName;
1377 return *this;
1378 }
1379 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1380 {
1381 fExtraTypeInfo.fContent = content;
1382 return *this;
1383 }
1384
1386};
1387
1388// clang-format off
1389/**
1390\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1391\ingroup NTuple
1392\brief A helper class for piece-wise construction of an RNTupleDescriptor
1393
1394Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1395*/
1396// clang-format on
1398private:
1401
1402public:
1403 /// Checks whether invariants hold:
1404 /// * NTuple name is valid
1405 /// * Fields have valid parents
1406 /// * Number of columns is constant across column representations
1410
1411 void SetNTuple(const std::string_view name, const std::string_view description);
1412 void SetFeature(unsigned int flag);
1413
1414 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1416 /// The real footer size also include the page list envelopes
1418
1419 void AddField(const RFieldDescriptor &fieldDesc);
1422
1423 // The field that the column belongs to has to be already available. For fields with multiple columns,
1424 // the columns need to be added in order of the column index
1426
1429
1431
1432 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1433 void Reset();
1434
1435 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1436 /// annotated as begin part of the header extension.
1437 void BeginHeaderExtension();
1438
1439 /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1441};
1442
1443} // namespace Internal
1444} // namespace Experimental
1445} // namespace ROOT
1446
1447#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
const RClusterDescriptor::RColumnRange & GetColumnRange(DescriptorId_t physicalId)
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
void AddClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & Type(EColumnType type)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersionTo(std::uint32_t typeVersionTo)
RExtraTypeInfoDescriptorBuilder & TypeVersionFrom(std::uint32_t typeVersionFrom)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & ProjectionSourceId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
std::unordered_map< DescriptorId_t, RColumnRange >::const_iterator Iter_t
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(RPageRange &&other)=default
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
EColumnType fType
The on-disk column type.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool Contains(DescriptorId_t physicalId) const
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
std::uint32_t fTypeVersionFrom
Extra type information restricted to a certain version range of the type.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
const std::string & GetFieldName() const
const std::string & GetTypeName() const
const std::string & GetFieldDescription() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
const std::string & GetTypeAlias() const
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::optional< std::uint32_t > GetTypeChecksum() const
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
Used to loop over all the extra type info record of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
std::vector< DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
void AddExtendedColumn(const RColumnDescriptor &columnDesc)
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
const std::vector< DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
void AddExtendedField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RColumnDescriptorIterable GetColumnIterable() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
const std::string & GetDescription() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
ClusterSize_t::ValueType fFirstInPage
Index (in cluster) of the first element in page.
RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
Generic information about the physical location of data.
static void output()