Logo ROOT  
Reference Guide
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
18
19#include <ROOT/RColumnModel.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <algorithm>
27#include <chrono>
28#include <functional>
29#include <iterator>
30#include <map>
31#include <memory>
32#include <ostream>
33#include <vector>
34#include <string>
35#include <unordered_map>
36#include <unordered_set>
37
38namespace ROOT {
39namespace Experimental {
40
41class RFieldDescriptorBuilder;
42class RNTupleDescriptor;
43class RNTupleDescriptorBuilder;
44class RNTupleModel;
45
46namespace Detail {
47 class RFieldBase;
48}
49
50
51// clang-format off
52/**
53\class ROOT::Experimental::RFieldDescriptor
54\ingroup NTuple
55\brief Meta-data stored for every field of an ntuple
56*/
57// clang-format on
61
62private:
64 /// The version of the C++-type-to-column translation mechanics
65 std::uint32_t fFieldVersion = 0;
66 /// The version of the C++ type itself
67 std::uint32_t fTypeVersion = 0;
68 /// The leaf name, not including parent fields
69 std::string fFieldName;
70 /// Free text set by the user
71 std::string fFieldDescription;
72 /// The C++ type that was used when writing the field
73 std::string fTypeName;
74 /// The number of elements per entry for fixed-size arrays
75 std::uint64_t fNRepetitions = 0;
76 /// The structural information carried by this field in the data model tree
78 /// Establishes sub field relationships, such as classes and collections
80 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
81 /// order of sub fields.
82 std::vector<DescriptorId_t> fLinkIds;
83
84public:
85 RFieldDescriptor() = default;
86 RFieldDescriptor(const RFieldDescriptor &other) = delete;
90
91 bool operator==(const RFieldDescriptor &other) const;
92 /// Get a copy of the descriptor
93 RFieldDescriptor Clone() const;
94 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
95 /// access to sub fields, which is provided by the ntuple descriptor argument.
96 std::unique_ptr<Detail::RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
97
98 DescriptorId_t GetId() const { return fFieldId; }
99 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
100 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
101 std::string GetFieldName() const { return fFieldName; }
102 std::string GetFieldDescription() const { return fFieldDescription; }
103 std::string GetTypeName() const { return fTypeName; }
104 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
107 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
108};
109
110
111// clang-format off
112/**
113\class ROOT::Experimental::RColumnDescriptor
114\ingroup NTuple
115\brief Meta-data stored for every column of an ntuple
116*/
117// clang-format on
121
122private:
123 /// The actual column identifier, which is the link to the corresponding field
125 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
127 /// Contains the column type and whether it is sorted
129 /// Every column belongs to one and only one field
131 /// A field can be serialized into several columns, which are numbered from zero to $n$
132 std::uint32_t fIndex;
133
134public:
135 RColumnDescriptor() = default;
136 RColumnDescriptor(const RColumnDescriptor &other) = delete;
140
141 bool operator==(const RColumnDescriptor &other) const;
142 /// Get a copy of the descriptor
143 RColumnDescriptor Clone() const;
144
147 RColumnModel GetModel() const { return fModel; }
148 std::uint32_t GetIndex() const { return fIndex; }
151};
152
153// clang-format off
154/**
155\class ROOT::Experimental::RColumnGroupDescriptor
156\ingroup NTuple
157\brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
158
159Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
160is used to denote the column group of all the columns. Every ntuple has at least one column group.
161*/
162// clang-format on
165
166private:
168 std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
169
170public:
176
177 bool operator==(const RColumnGroupDescriptor &other) const;
178
180 const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
181 bool Contains(DescriptorId_t physicalId) const
182 {
183 return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
184 }
185 bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
186};
187
188// clang-format off
189/**
190\class ROOT::Experimental::RClusterDescriptor
191\ingroup NTuple
192\brief Meta-data for a set of ntuple clusters
193
194The cluster descriptor is built in two phases. In a first phase, the descriptor has only summary data,
195i.e. the ID and the event range. In a second phase, page locations and column ranges are added.
196Both phases are populated by the RClusterDescriptorBuilder.
197Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
198for instance when describing friend ntuples.
199*/
200// clang-format on
203
204public:
205 /// The window of element indexes of a particular column in a particular cluster
208 /// A 64bit element index
210 /// A 32bit value for the number of column elements in the cluster
212 /// The usual format for ROOT compression settings (see Compression.h).
213 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
214 std::int64_t fCompressionSettings = 0;
215
216 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
217 // Should this be done on the field level?
218
219 bool operator==(const RColumnRange &other) const {
222 }
223
226 }
227 };
228
229 /// Records the parition of data into pages for a particular column in a particular cluster
230 struct RPageRange {
231 /// We do not need to store the element size / uncompressed page size because we know to which column
232 /// the page belongs
233 struct RPageInfo {
234 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
236 /// The meaning of fLocator depends on the storage backend.
238
239 bool operator==(const RPageInfo &other) const {
240 return fNElements == other.fNElements && fLocator == other.fLocator;
241 }
242 };
244 /// Index (in cluster) of the first element in page.
246 /// Page number in the corresponding RPageRange.
248
249 RPageInfoExtended() = default;
251 : RPageInfo(pi), fFirstInPage(i), fPageNo(n) {}
252 };
253
254 RPageRange() = default;
255 RPageRange(const RPageRange &other) = delete;
256 RPageRange &operator =(const RPageRange &other) = delete;
257 RPageRange(RPageRange &&other) = default;
258 RPageRange &operator =(RPageRange &&other) = default;
259
261 RPageRange clone;
263 clone.fPageInfos = fPageInfos;
264 return clone;
265 }
266
267 /// Find the page in the RPageRange that contains the given element. The element must exist.
268 RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const;
269
271 std::vector<RPageInfo> fPageInfos;
272
273 bool operator==(const RPageRange &other) const {
274 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
275 }
276 };
277
278private:
280 /// Clusters can be swapped by adjusting the entry offsets
282 // TODO(jblomer): change to std::uint64_t
284 bool fHasPageLocations = false;
285
286 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
287 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
288
289 void EnsureHasPageLocations() const;
290
291public:
293 // Constructor for a summary-only cluster descriptor without page locations
294 RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
295 : fClusterId(clusterId), fFirstEntryIndex(firstEntryIndex), fNEntries(ClusterSize_t(nEntries))
296 {
297 }
302
304
305 bool operator==(const RClusterDescriptor &other) const;
306
307 DescriptorId_t GetId() const { return fClusterId; }
311 {
313 return fColumnRanges.at(physicalId);
314 }
315 const RPageRange &GetPageRange(DescriptorId_t physicalId) const
316 {
318 return fPageRanges.at(physicalId);
319 }
320 bool ContainsColumn(DescriptorId_t physicalId) const;
321 std::unordered_set<DescriptorId_t> GetColumnIds() const;
322 std::uint64_t GetBytesOnStorage() const;
323 bool HasPageLocations() const { return fHasPageLocations; }
324};
325
326// clang-format off
327/**
328\class ROOT::Experimental::RClusterGroupDescriptor
329\ingroup NTuple
330\brief Clusters are stored in cluster groups. Cluster groups span all the columns of a certain event range.
331
332Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
333may contain sharded clusters. However, a cluster group must contain the clusters spanning all the columns for the
334given event range. Cluster groups must partition the entry range of an ntuple.
335Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
336the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
337*/
338// clang-format on
341
342private:
344 std::vector<DescriptorId_t> fClusterIds;
345 /// The page list that corresponds to the cluster group
347 /// Uncompressed size of the page list
348 std::uint32_t fPageListLength = 0;
349
350public:
356
358
359 bool operator==(const RClusterGroupDescriptor &other) const;
360
362 std::uint64_t GetNClusters() const { return fClusterIds.size(); }
364 std::uint32_t GetPageListLength() const { return fPageListLength; }
365 bool Contains(DescriptorId_t clusterId) const
366 {
367 return std::find(fClusterIds.begin(), fClusterIds.end(), clusterId) != fClusterIds.end();
368 }
369 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
370};
371
372// clang-format off
373/**
374\class ROOT::Experimental::RNTupleDescriptor
375\ingroup NTuple
376\brief The on-storage meta-data of an ntuple
377
378Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
379several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
380relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
381its location and size, and for every column the range of element indexes as well as a list of pages and page
382locations.
383
384The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
385for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
386
387The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
388the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
389writte struct. This allows for forward and backward compatibility when the meta-data evolves.
390*/
391// clang-format on
394
395private:
396 /// The ntuple name needs to be unique in a given storage location (file)
397 std::string fName;
398 /// Free text from the user
399 std::string fDescription;
400
401 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
402 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
403
404 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster summaries are added
405 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
406
407 /**
408 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
409 * active the page locations. During the lifetime of the descriptor, page location information for clusters
410 * can be added or removed. When this happens, the generation should be increased, so that users of the
411 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
412 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
413 */
414 std::uint64_t fGeneration = 0;
415
416 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
417 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
418 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
419 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
420 /// from a chain of files
421 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
422
423public:
424 // clang-format off
425 /**
426 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
427 \ingroup NTuple
428 \brief Used to loop over a field's associated columns
429 */
430 // clang-format on
432 private:
433 /// The associated NTuple for this range.
435 /// The descriptor ids of the columns ordered by index id
436 std::vector<DescriptorId_t> fColumns = {};
437 public:
438 class RIterator {
439 private:
440 /// The enclosing range's NTuple.
442 /// The enclosing range's descriptor id list.
443 const std::vector<DescriptorId_t> &fColumns;
444 std::size_t fIndex = 0;
445 public:
446 using iterator_category = std::forward_iterator_tag;
449 using difference_type = std::ptrdiff_t;
452
453 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
454 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
455 iterator operator++() { ++fIndex; return *this; }
457 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
458 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
459 };
460
462 : fNTuple(ntuple)
463 {
464 for (unsigned int i = 0; true; ++i) {
465 auto logicalId = ntuple.FindLogicalColumnId(field.GetId(), i);
466 if (logicalId == kInvalidDescriptorId)
467 break;
468 fColumns.emplace_back(logicalId);
469 }
470 }
473 };
474
475 // clang-format off
476 /**
477 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
478 \ingroup NTuple
479 \brief Used to loop over a field's child fields
480 */
481 // clang-format on
483 private:
484 /// The associated NTuple for this range.
486 /// The descriptor ids of the child fields. These may be sorted using
487 /// a comparison function.
488 std::vector<DescriptorId_t> fFieldChildren = {};
489 public:
490 class RIterator {
491 private:
492 /// The enclosing range's NTuple.
494 /// The enclosing range's descriptor id list.
495 const std::vector<DescriptorId_t>& fFieldChildren;
496 std::size_t fIndex = 0;
497 public:
498 using iterator_category = std::forward_iterator_tag;
501 using difference_type = std::ptrdiff_t;
504
505 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
506 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
507 iterator operator++() { ++fIndex; return *this; }
511 );
512 }
513 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
514 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
515 };
517 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
518 /// Sort the range using an arbitrary comparison function.
520 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
521 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
522 {
523 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
524 }
526 return RIterator(fNTuple, fFieldChildren, 0);
527 }
530 }
531 };
532
533 // clang-format off
534 /**
535 \class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
536 \ingroup NTuple
537 \brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
538
539 Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
540 FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
541 */
542 // clang-format on
544 private:
545 /// The associated NTuple for this range.
547
548 public:
549 class RIterator {
550 private:
551 /// The enclosing range's NTuple.
553 std::size_t fIndex = 0;
554
555 public:
556 using iterator_category = std::forward_iterator_tag;
559 using difference_type = std::ptrdiff_t;
562
563 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
565 {
566 ++fIndex;
567 return *this;
568 }
570 {
571 auto it = fNTuple.fClusterGroupDescriptors.begin();
572 std::advance(it, fIndex);
573 return it->second;
574 }
575 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
576 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
577 };
578
582 };
583
584 // clang-format off
585 /**
586 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
587 \ingroup NTuple
588 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
589
590 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
591 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
592 */
593 // clang-format on
595 private:
596 /// The associated NTuple for this range.
598 public:
599 class RIterator {
600 private:
601 /// The enclosing range's NTuple.
603 std::size_t fIndex = 0;
604 public:
605 using iterator_category = std::forward_iterator_tag;
608 using difference_type = std::ptrdiff_t;
611
612 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
613 iterator operator++() { ++fIndex; return *this; }
615 auto it = fNTuple.fClusterDescriptors.begin();
616 std::advance(it, fIndex);
617 return it->second;
618 }
619 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
620 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
621 };
622
626 };
627
628 RNTupleDescriptor() = default;
629 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
633
634 std::unique_ptr<RNTupleDescriptor> Clone() const;
635
636 bool operator ==(const RNTupleDescriptor &other) const;
637
638 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
639 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
640
642 return fFieldDescriptors.at(fieldId);
643 }
645 return fColumnDescriptors.at(columnId);
646 }
648 {
649 return fClusterGroupDescriptors.at(clusterGroupId);
650 }
652 return fClusterDescriptors.at(clusterId);
653 }
654
656 return RFieldDescriptorIterable(*this, fieldDesc);
657 }
659 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
660 {
661 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
662 }
664 return GetFieldIterable(GetFieldDescriptor(fieldId));
665 }
667 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
668 {
669 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
670 }
673 }
675 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
676 {
677 return GetFieldIterable(GetFieldZeroId(), comparator);
678 }
679
681 {
682 return RColumnDescriptorIterable(*this, fieldDesc);
683 }
685 {
686 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
687 }
688
690
692 {
693 return RClusterDescriptorIterable(*this);
694 }
695
696 std::string GetName() const { return fName; }
697 std::string GetDescription() const { return fDescription; }
698
699 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
700 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
701 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
702 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
703 std::size_t GetNClusters() const { return fClusterDescriptors.size(); }
704
705 /// We know the number of entries from adding the cluster summaries
707 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
708
709 /// Returns the logical parent of all top-level NTuple data fields.
713 /// Searches for a top-level field
715 DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
716 DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
720
721 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
722 /// In case of invalid field ID, an empty string is returned.
723 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
724
725 /// Methods to load and drop cluster details
728
729 std::uint64_t GetGeneration() const { return fGeneration; }
731
732 /// Re-create the C++ model from the stored meta-data
733 std::unique_ptr<RNTupleModel> GenerateModel() const;
734 void PrintInfo(std::ostream &output) const;
735};
736
737
738// clang-format off
739/**
740\class ROOT::Experimental::RColumnDescriptorBuilder
741\ingroup NTuple
742\brief A helper class for piece-wise construction of an RColumnDescriptor
743
744Dangling column descriptors can become actual descriptors when added to an
745RNTupleDescriptorBuilder instance and then linked to their fields.
746*/
747// clang-format on
749private:
751public:
752 /// Make an empty column descriptor builder.
754
756 {
757 fColumn.fLogicalColumnId = logicalColumnId;
758 return *this;
759 }
761 {
762 fColumn.fPhysicalColumnId = physicalColumnId;
763 return *this;
764 }
766 fColumn.fModel = model;
767 return *this;
768 }
770 fColumn.fFieldId = fieldId;
771 return *this;
772 }
775 return *this;
776 }
778 /// Attempt to make a column descriptor. This may fail if the column
779 /// was not given enough information to make a proper descriptor.
781};
782
783
784// clang-format off
785/**
786\class ROOT::Experimental::RFieldDescriptorBuilder
787\ingroup NTuple
788\brief A helper class for piece-wise construction of an RFieldDescriptor
789
790Dangling field descriptors describe a single field in isolation. They are
791missing the necessary relationship information (parent field, any child fields)
792required to describe a real NTuple field.
793
794Dangling field descriptors can only become actual descriptors when added to an
795RNTupleDescriptorBuilder instance and then linked to other fields.
796*/
797// clang-format on
799private:
801public:
802 /// Make an empty dangling field descriptor.
804 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
805 /// Relationship information is lost during the conversion to a
806 /// dangling descriptor:
807 /// * Parent id is reset to an invalid id.
808 /// * Field children ids are forgotten.
809 ///
810 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
811 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
812
813 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
815
817 fField.fFieldId = fieldId;
818 return *this;
819 }
820 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
821 {
822 fField.fFieldVersion = fieldVersion;
823 return *this;
824 }
825 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
826 {
827 fField.fTypeVersion = typeVersion;
828 return *this;
829 }
832 return *this;
833 }
834 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
835 fField.fFieldName = fieldName;
836 return *this;
837 }
838 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
839 fField.fFieldDescription = fieldDescription;
840 return *this;
841 }
842 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
843 fField.fTypeName = typeName;
844 return *this;
845 }
846 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
847 fField.fNRepetitions = nRepetitions;
848 return *this;
849 }
851 fField.fStructure = structure;
852 return *this;
853 }
855 /// Attempt to make a field descriptor. This may fail if the dangling field
856 /// was not given enough information to make a proper descriptor.
858};
859
860
861// clang-format off
862/**
863\class ROOT::Experimental::RClusterDescriptorBuilder
864\ingroup NTuple
865\brief A helper class for piece-wise construction of an RClusterDescriptor
866
867The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
868piecewise addition of page locations.
869*/
870// clang-format on
872private:
874
875public:
876 /// Make an empty cluster descriptor builder.
877 RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
878 : fCluster(clusterId, firstEntryIndex, nEntries)
879 {
880 }
881
882 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
883 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
884
885 /// Move out the full cluster descriptor including page locations
887};
888
889// clang-format off
890/**
891\class ROOT::Experimental::RClusterGroupDescriptorBuilder
892\ingroup NTuple
893\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
894*/
895// clang-format on
897private:
899
900public:
902
904 {
905 fClusterGroup.fClusterGroupId = clusterGroupId;
906 return *this;
907 }
909 {
910 fClusterGroup.fPageListLocator = pageListLocator;
911 return *this;
912 }
913 RClusterGroupDescriptorBuilder &PageListLength(std::uint32_t pageListLength)
914 {
915 fClusterGroup.fPageListLength = pageListLength;
916 return *this;
917 }
918 void AddCluster(DescriptorId_t clusterId) { fClusterGroup.fClusterIds.emplace_back(clusterId); }
919
921
922 /// Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster group
923 static std::vector<RClusterDescriptorBuilder>
924 GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId);
925
927};
928
929// clang-format off
930/**
931\class ROOT::Experimental::RColumnGroupDescriptorBuilder
932\ingroup NTuple
933\brief A helper class for piece-wise construction of an RColumnGroupDescriptor
934*/
935// clang-format on
937private:
939
940public:
942
944 {
945 fColumnGroup.fColumnGroupId = columnGroupId;
946 return *this;
947 }
948 void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
949
951};
952
953// clang-format off
954/**
955\class ROOT::Experimental::RNTupleDescriptorBuilder
956\ingroup NTuple
957\brief A helper class for piece-wise construction of an RNTupleDescriptor
958
959Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
960*/
961// clang-format on
963private:
965 std::uint32_t fHeaderCRC32 = 0;
966
968public:
969 /// Checks whether invariants hold:
970 /// * NTuple name is valid
971 /// * Fields have valid parent and child ids
973 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
975
976 void SetNTuple(const std::string_view name, const std::string_view description);
977 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCRC32 = crc32; }
978 std::uint32_t GetHeaderCRC32() const { return fHeaderCRC32; }
979
981 /// The real footer size also include the page list envelopes
983
984 void AddField(const RFieldDescriptor& fieldDesc);
986
987 void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId,
988 const RColumnModel &model, std::uint32_t index);
990
991 RResult<void> AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries);
993
994 /// Used during writing. For reading, cluster summaries are added in the builder and cluster details are added
995 /// on demand through the RNTupleDescriptor.
997
998 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
999 void Reset();
1000};
1001
1002} // namespace Experimental
1003} // namespace ROOT
1004
1005#endif // ROOT7_RNTupleDescriptor
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition: TGX11.cxx:110
@ kInvalid
Definition: TSystem.h:79
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
Make an empty cluster descriptor builder.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
Clusters are stored in cluster groups.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool Contains(DescriptorId_t clusterId) const
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
RResult< RColumnGroupDescriptor > MoveDescriptor()
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool Contains(DescriptorId_t physicalId) const
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of an RNTuple column.
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void AddField(const RFieldDescriptor &fieldDesc)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
Definition: RError.hxx:269
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition: RError.hxx:207
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:48
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:38
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
Definition: RNTupleUtil.hxx:49
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
static constexpr double pi
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
RPageInfoExtended(const RPageInfo &pi, RClusterSize::ValueType i, NTupleSize_t n)
RClusterSize::ValueType fFirstInPage
Index (in cluster) of the first element in page.
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
bool operator==(const RPageRange &other) const
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:51
Generic information about the physical location of data.
static void output()