Logo ROOT  
Reference Guide
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
18
19#include <ROOT/RColumnModel.hxx>
20#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RStringView.hxx>
23
24#include <algorithm>
25#include <chrono>
26#include <functional>
27#include <iterator>
28#include <memory>
29#include <ostream>
30#include <vector>
31#include <string>
32#include <unordered_map>
33#include <unordered_set>
34
35namespace ROOT {
36namespace Experimental {
37
38class RDanglingFieldDescriptor;
39class RNTupleDescriptor;
40class RNTupleDescriptorBuilder;
41class RNTupleModel;
42
43namespace Detail {
44 class RFieldBase;
45}
46
47// clang-format off
48/**
49\class ROOT::Experimental::RFieldDescriptor
50\ingroup NTuple
51\brief Meta-data stored for every field of an ntuple
52*/
53// clang-format on
57
58private:
60 /// The version of the C++-type-to-column translation mechanics
62 /// The version of the C++ type itself
64 /// The leaf name, not including parent fields
65 std::string fFieldName;
66 /// Free text set by the user
67 std::string fFieldDescription;
68 /// The C++ type that was used when writing the field
69 std::string fTypeName;
70 /// The number of elements per entry for fixed-size arrays
71 std::uint64_t fNRepetitions = 0;
72 /// The structural information carried by this field in the data model tree
74 /// Establishes sub field relationships, such as classes and collections
76 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
77 /// order of sub fields.
78 std::vector<DescriptorId_t> fLinkIds;
79
80public:
81 RFieldDescriptor() = default;
82 RFieldDescriptor(const RFieldDescriptor &other) = delete;
86
87 /// In order to handle changes to the serialization routine in future ntuple versions
88 static constexpr std::uint16_t kFrameVersionCurrent = 0;
89 static constexpr std::uint16_t kFrameVersionMin = 0;
90
91 bool operator==(const RFieldDescriptor &other) const;
92 /// Get a copy of the descriptor
93 RFieldDescriptor Clone() const;
94 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
95 /// access to sub fields, which is provided by the ntuple descriptor argument.
96 std::unique_ptr<Detail::RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
97
98 DescriptorId_t GetId() const { return fFieldId; }
101 std::string GetFieldName() const { return fFieldName; }
102 std::string GetFieldDescription() const { return fFieldDescription; }
103 std::string GetTypeName() const { return fTypeName; }
104 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
107 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
108};
109
110
111// clang-format off
112/**
113\class ROOT::Experimental::RColumnDescriptor
114\ingroup NTuple
115\brief Meta-data stored for every column of an ntuple
116*/
117// clang-format on
120
121private:
123 /// Versions can change, e.g., when new column types are added
125 /// Contains the column type and whether it is sorted
127 /// Every column belongs to one and only one field
129 /// A field can be serialized into several columns, which are numbered from zero to $n$
130 std::uint32_t fIndex;
131
132public:
133 /// In order to handle changes to the serialization routine in future ntuple versions
134 static constexpr std::uint16_t kFrameVersionCurrent = 0;
135 static constexpr std::uint16_t kFrameVersionMin = 0;
136
137 RColumnDescriptor() = default;
138 RColumnDescriptor(const RColumnDescriptor &other) = delete;
142
143 bool operator==(const RColumnDescriptor &other) const;
144
145 DescriptorId_t GetId() const { return fColumnId; }
147 RColumnModel GetModel() const { return fModel; }
148 std::uint32_t GetIndex() const { return fIndex; }
150};
151
152
153// clang-format off
154/**
155\class ROOT::Experimental::RClusterDescriptor
156\ingroup NTuple
157\brief Meta-data for a set of ntuple clusters
158
159The cluster descriptor might carry information of only a subset of available clusters, for instance if multiple
160files are chained and not all of them have been processed yet. Clusters usually span across all available columns but
161in some cases they can describe only a subset of the columns, for instance when describing friend ntuples.
162*/
163// clang-format on
166
167public:
168 /// Generic information about the physical location of data. Values depend on the concrete storage type. E.g.,
169 /// for a local file fUrl might be unsused and fPosition might be a file offset. Objects on storage can be compressed
170 /// and therefore we need to store their actual size.
171 struct RLocator {
172 std::int64_t fPosition = 0;
173 std::uint32_t fBytesOnStorage = 0;
174 std::string fUrl;
175
176 bool operator==(const RLocator &other) const {
177 return fPosition == other.fPosition && fBytesOnStorage == other.fBytesOnStorage && fUrl == other.fUrl;
178 }
179 };
180
181 /// The window of element indexes of a particular column in a particular cluster
184 /// A 64bit element index
186 /// A 32bit value for the number of column elements in the cluster
188 /// The usual format for ROOT compression settings (see Compression.h).
189 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
190 std::int64_t fCompressionSettings = 0;
191
192 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
193 // Should this be done on the field level?
194
195 bool operator==(const RColumnRange &other) const {
196 return fColumnId == other.fColumnId && fFirstElementIndex == other.fFirstElementIndex &&
198 }
199
200 bool Contains(NTupleSize_t index) const {
201 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
202 }
203 };
204
205 /// Records the parition of data into pages for a particular column in a particular cluster
206 struct RPageRange {
207 /// We do not need to store the element size / uncompressed page size because we know to which column
208 /// the page belongs
209 struct RPageInfo {
210 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
212 /// The meaning of fLocator depends on the storage backend.
214
215 bool operator==(const RPageInfo &other) const {
216 return fNElements == other.fNElements && fLocator == other.fLocator;
217 }
218 };
220 /// Index (in cluster) of the first element in page.
222 /// Page number in the corresponding RPageRange.
224
226 : RPageInfo(pi), fFirstInPage(i), fPageNo(n) {}
227 };
228
229 RPageRange() = default;
230 RPageRange(const RPageRange &other) = delete;
231 RPageRange &operator =(const RPageRange &other) = delete;
232 RPageRange(RPageRange &&other) = default;
233 RPageRange &operator =(RPageRange &&other) = default;
234
236 RPageRange clone;
237 clone.fColumnId = fColumnId;
238 clone.fPageInfos = fPageInfos;
239 return clone;
240 }
241
242 /// Find the page in the RPageRange that contains the given element. The element must exist.
243 RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const;
244
246 std::vector<RPageInfo> fPageInfos;
247
248 bool operator==(const RPageRange &other) const {
249 return fColumnId == other.fColumnId && fPageInfos == other.fPageInfos;
250 }
251 };
252
253private:
255 /// Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum
257 /// Clusters can be swapped by adjusting the entry offsets
260 /// For pre-fetching / caching an entire contiguous cluster
262
263 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
264 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
265
266public:
267 /// In order to handle changes to the serialization routine in future ntuple versions
268 static constexpr std::uint16_t kFrameVersionCurrent = 0;
269 static constexpr std::uint16_t kFrameVersionMin = 0;
270
276
277 bool operator==(const RClusterDescriptor &other) const;
278
279 DescriptorId_t GetId() const { return fClusterId; }
283 RLocator GetLocator() const { return fLocator; }
284 const RColumnRange &GetColumnRange(DescriptorId_t columnId) const { return fColumnRanges.at(columnId); }
285 const RPageRange &GetPageRange(DescriptorId_t columnId) const { return fPageRanges.at(columnId); }
286 bool ContainsColumn(DescriptorId_t columnId) const;
287 std::unordered_set<DescriptorId_t> GetColumnIds() const;
288 std::uint64_t GetBytesOnStorage() const;
289};
290
291
292// clang-format off
293/**
294\class ROOT::Experimental::RNTupleDescriptor
295\ingroup NTuple
296\brief The on-storage meta-data of an ntuple
297
298Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
299several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
300relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
301its location and size, and for every column the range of element indexes as well as a list of pages and page
302locations.
303
304The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
305for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
306
307The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
308the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
309writte struct. This allows for forward and backward compatibility when the meta-data evolves.
310*/
311// clang-format on
314
315private:
316 /// The ntuple name needs to be unique in a given storage location (file)
317 std::string fName;
318 /// Free text from the user
319 std::string fDescription;
320 /// The origin of the data
321 std::string fAuthor;
322 /// The current responsible for storing the data
323 std::string fCustodian;
324 /// The time stamp of the ntuple data (immutable)
325 std::chrono::system_clock::time_point fTimeStampData;
326 /// The time stamp of writing the data to storage, which gets updated when re-written
327 std::chrono::system_clock::time_point fTimeStampWritten;
328 /// The version evolves with the ntuple summary meta-data
330 /// Every NTuple gets a unique identifier
332 /// Column sets that are created as derived sets from existing NTuples share the same group id.
333 /// NTuples in the same group have the same number of entries and are supposed to contain associated data.
335
336 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
337 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
338 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
339 /// from a chain of files
340 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
341
342public:
343 // clang-format off
344 /**
345 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
346 \ingroup NTuple
347 \brief Used to loop over a field's associated columns
348 */
349 // clang-format on
351 private:
352 /// The associated NTuple for this range.
354 /// The descriptor ids of the columns ordered by index id
355 std::vector<DescriptorId_t> fColumns = {};
356 public:
357 class RIterator {
358 private:
359 /// The enclosing range's NTuple.
361 /// The enclosing range's descriptor id list.
362 const std::vector<DescriptorId_t> &fColumns;
363 std::size_t fIndex = 0;
364 public:
365 using iterator_category = std::forward_iterator_tag;
368 using difference_type = std::ptrdiff_t;
371
372 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
373 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
374 iterator operator++() { ++fIndex; return *this; }
376 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
377 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
378 };
379
381 : fNTuple(ntuple)
382 {
383 for (unsigned int i = 0; true; ++i) {
384 auto columnId = ntuple.FindColumnId(field.GetId(), i);
385 if (columnId == kInvalidDescriptorId)
386 break;
387 fColumns.emplace_back(columnId);
388 }
389 }
392 };
393
394 // clang-format off
395 /**
396 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
397 \ingroup NTuple
398 \brief Used to loop over a field's child fields
399 */
400 // clang-format on
402 private:
403 /// The associated NTuple for this range.
405 /// The descriptor ids of the child fields. These may be sorted using
406 /// a comparison function.
407 std::vector<DescriptorId_t> fFieldChildren = {};
408 public:
409 class RIterator {
410 private:
411 /// The enclosing range's NTuple.
413 /// The enclosing range's descriptor id list.
414 const std::vector<DescriptorId_t>& fFieldChildren;
415 std::size_t fIndex = 0;
416 public:
417 using iterator_category = std::forward_iterator_tag;
420 using difference_type = std::ptrdiff_t;
423
424 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
425 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
426 iterator operator++() { ++fIndex; return *this; }
430 );
431 }
432 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
433 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
434 };
436 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
437 /// Sort the range using an arbitrary comparison function.
439 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
440 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
441 {
442 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
443 }
445 return RIterator(fNTuple, fFieldChildren, 0);
446 }
449 }
450 };
451
452 // clang-format off
453 /**
454 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
455 \ingroup NTuple
456 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
457
458 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
459 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
460 TODO(jblomer): review naming of *Range classes and possibly rename consistently to *Iterable
461 */
462 // clang-format on
464 private:
465 /// The associated NTuple for this range.
467 public:
468 class RIterator {
469 private:
470 /// The enclosing range's NTuple.
472 std::size_t fIndex = 0;
473 public:
474 using iterator_category = std::forward_iterator_tag;
477 using difference_type = std::ptrdiff_t;
480
481 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
482 iterator operator++() { ++fIndex; return *this; }
484 auto it = fNTuple.fClusterDescriptors.begin();
485 std::advance(it, fIndex);
486 return it->second;
487 }
488 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
489 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
490 };
491
495 };
496
497 /// In order to handle changes to the serialization routine in future ntuple versions
498 static constexpr std::uint16_t kFrameVersionCurrent = 0;
499 static constexpr std::uint16_t kFrameVersionMin = 0;
500 /// The preamble is sufficient to get the length of the header
501 static constexpr unsigned int kNBytesPreamble = 8;
502 /// The last few bytes after the footer store the length of footer and header
503 static constexpr unsigned int kNBytesPostscript = 16;
504
505 RNTupleDescriptor() = default;
506 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
510
511 bool operator ==(const RNTupleDescriptor &other) const;
512
513 /// We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's without libCore
514 /// Serializes the global ntuple information as well as the column and field schemata
515 /// Returns the number of bytes and fills buffer if it is not nullptr.
516 /// TODO(jblomer): instead of runtime testing for nullptr, there should be a template for the case where
517 /// only the size of the buffer is required.
518 std::uint32_t SerializeHeader(void* buffer) const;
519 /// Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
520 std::uint32_t SerializeFooter(void* buffer) const;
521 /// Given kNBytesPostscript bytes, extract the header and footer lengths in bytes
522 static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter);
523
524 std::uint32_t GetHeaderSize() const {
525 return SerializeHeader(nullptr);
526 }
527 std::uint32_t GetFooterSize() const {
528 return SerializeFooter(nullptr);
529 }
530
532 return fFieldDescriptors.at(fieldId);
533 }
535 return fColumnDescriptors.at(columnId);
536 }
538 return fClusterDescriptors.at(clusterId);
539 }
540
542 return RFieldDescriptorIterable(*this, fieldDesc);
543 }
545 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
546 {
547 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
548 }
550 return GetFieldIterable(GetFieldDescriptor(fieldId));
551 }
553 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
554 {
555 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
556 }
559 }
561 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
562 {
563 return GetFieldIterable(GetFieldZeroId(), comparator);
564 }
565
567 {
568 return RColumnDescriptorIterable(*this, fieldDesc);
569 }
571 {
572 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
573 }
574
576 {
577 return RClusterDescriptorIterable(*this);
578 }
579
580 std::string GetName() const { return fName; }
581 std::string GetDescription() const { return fDescription; }
582 std::string GetAuthor() const { return fAuthor; }
583 std::string GetCustodian() const { return fCustodian; }
584 std::chrono::system_clock::time_point GetTimeStampData() const { return fTimeStampData; }
585 std::chrono::system_clock::time_point GetTimeStampWritten() const { return fTimeStampWritten; }
587 RNTupleUuid GetOwnUuid() const { return fOwnUuid; }
589
590 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
591 std::size_t GetNColumns() const { return fColumnDescriptors.size(); }
592 std::size_t GetNClusters() const { return fClusterDescriptors.size(); }
593
594 // The number of entries as seen with the currently loaded cluster meta-data; there might be more
597
598 /// Returns the logical parent of all top-level NTuple data fields.
602 /// Searches for a top-level field
604 DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
608
609 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
610 /// In case of invalid field ID, an empty string is returned.
611 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
612
613 /// Re-create the C++ model from the stored meta-data
614 std::unique_ptr<RNTupleModel> GenerateModel() const;
615 void PrintInfo(std::ostream &output) const;
616};
617
618// clang-format off
619/**
620\class ROOT::Experimental::RDanglingFieldDescriptor
621\ingroup NTuple
622\brief A helper class for piece-wise construction of an RFieldDescriptor
623
624Dangling field descriptors describe a single field in isolation. They are
625missing the necessary relationship information (parent field, any child fields)
626required to describe a real NTuple field.
627
628Dangling field descriptors can only become actual descriptors when added to an
629RNTupleDescriptorBuilder instance and then linked to other fields.
630*/
631// clang-format on
633private:
635public:
636 /// Make an empty dangling field descriptor.
638 /// Make a new RDanglingFieldDescriptor based off an existing descriptor.
639 /// Relationship information is lost during the conversion to a
640 /// dangling descriptor:
641 /// * Parent id is reset to an invalid id.
642 /// * Field children ids are forgotten.
643 ///
644 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
645 explicit RDanglingFieldDescriptor(const RFieldDescriptor& fieldDesc);
646
647 /// Make a new RDanglingFieldDescriptor based off a live NTuple field.
649
651 fField.fFieldId = fieldId;
652 return *this;
653 }
655 fField.fFieldVersion = fieldVersion;
656 return *this;
657 }
659 fField.fTypeVersion = typeVersion;
660 return *this;
661 }
662 RDanglingFieldDescriptor& FieldName(const std::string& fieldName) {
663 fField.fFieldName = fieldName;
664 return *this;
665 }
666 RDanglingFieldDescriptor& FieldDescription(const std::string& fieldDescription) {
667 fField.fFieldDescription = fieldDescription;
668 return *this;
669 }
670 RDanglingFieldDescriptor& TypeName(const std::string& typeName) {
671 fField.fTypeName = typeName;
672 return *this;
673 }
674 RDanglingFieldDescriptor& NRepetitions(std::uint64_t nRepetitions) {
675 fField.fNRepetitions = nRepetitions;
676 return *this;
677 }
679 fField.fStructure = structure;
680 return *this;
681 }
682 /// Attempt to make a field descriptor. This may fail if the dangling field
683 /// was not given enough information to make a proper descriptor.
685};
686
687// clang-format off
688/**
689\class ROOT::Experimental::RNTupleDescriptorBuilder
690\ingroup NTuple
691\brief A helper class for piece-wise construction of an RNTupleDescriptor
692
693Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
694*/
695// clang-format on
697private:
699
700public:
701 /// Checks whether invariants hold:
702 /// * NTuple name is valid
703 /// * Fields have valid parent and child ids
705 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
707
708 void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author,
709 const RNTupleVersion &version, const RNTupleUuid &uuid);
710
711 void AddField(const RFieldDescriptor& fieldDesc);
713
714 void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId,
715 const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index);
716
717 void SetFromHeader(void* headerBuffer);
718
719 void AddCluster(DescriptorId_t clusterId, RNTupleVersion version,
720 NTupleSize_t firstEntryIndex, ClusterSize_t nEntries);
724
725 void AddClustersFromFooter(void* footerBuffer);
726
727 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
728 void Reset();
729};
730
731} // namespace Experimental
732} // namespace ROOT
733
734#endif
char name[80]
Definition: TGX11.cxx:110
@ kInvalid
Definition: TSystem.h:79
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
bool ContainsColumn(DescriptorId_t columnId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
RLocator fLocator
For pre-fetching / caching an entire contiguous cluster.
static constexpr std::uint16_t kFrameVersionMin
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
Meta-data stored for every column of an ntuple.
RColumnDescriptor(const RColumnDescriptor &other)=delete
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
A helper class for piece-wise construction of an RFieldDescriptor.
RDanglingFieldDescriptor & TypeVersion(const RNTupleVersion &typeVersion)
RDanglingFieldDescriptor & FieldVersion(const RNTupleVersion &fieldVersion)
RDanglingFieldDescriptor & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RDanglingFieldDescriptor FromField(const Detail::RFieldBase &field)
Make a new RDanglingFieldDescriptor based off a live NTuple field.
RDanglingFieldDescriptor & NRepetitions(std::uint64_t nRepetitions)
RDanglingFieldDescriptor & FieldId(DescriptorId_t fieldId)
RDanglingFieldDescriptor & FieldName(const std::string &fieldName)
RDanglingFieldDescriptor & TypeName(const std::string &typeName)
RDanglingFieldDescriptor()=default
Make an empty dangling field descriptor.
RDanglingFieldDescriptor & FieldDescription(const std::string &fieldDescription)
A field translates read and write calls from/to underlying columns to/from tree values.
Definition: RField.hxx:58
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
const RNTupleDescriptor & GetDescriptor() const
void SetClusterLocator(DescriptorId_t clusterId, RClusterDescriptor::RLocator locator)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
void AddField(const RFieldDescriptor &fieldDesc)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::chrono::system_clock::time_point GetTimeStampData() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::chrono::system_clock::time_point GetTimeStampWritten() const
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
static constexpr std::uint16_t kFrameVersionMin
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
static constexpr unsigned int kNBytesPreamble
The preamble is sufficient to get the length of the header.
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
static constexpr unsigned int kNBytesPostscript
The last few bytes after the footer store the length of footer and header.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
Definition: RError.hxx:257
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition: RError.hxx:195
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:77
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:67
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
Definition: RNTupleUtil.hxx:78
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:150
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static constexpr double pi
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
Generic information about the physical location of data.
RPageInfoExtended(const RPageInfo &pi, RClusterSize::ValueType i, NTupleSize_t n)
RClusterSize::ValueType fFirstInPage
Index (in cluster) of the first element in page.
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
bool operator==(const RPageRange &other) const
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:80
static void output(int code)
Definition: gifencode.c:226