Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
18
19#include <ROOT/RColumnModel.hxx>
20#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RStringView.hxx>
23
24#include <algorithm>
25#include <chrono>
26#include <functional>
27#include <memory>
28#include <ostream>
29#include <vector>
30#include <string>
31#include <unordered_map>
32
33namespace ROOT {
34namespace Experimental {
35
36class RDanglingFieldDescriptor;
37class RNTupleDescriptorBuilder;
38class RNTupleModel;
39
40// clang-format off
41/**
42\class ROOT::Experimental::RFieldDescriptor
43\ingroup NTuple
44\brief Meta-data stored for every field of an ntuple
45*/
46// clang-format on
50
51private:
53 /// The version of the C++-type-to-column translation mechanics
55 /// The version of the C++ type itself
57 /// The leaf name, not including parent fields
58 std::string fFieldName;
59 /// Free text set by the user
60 std::string fFieldDescription;
61 /// The C++ type that was used when writing the field
62 std::string fTypeName;
63 /// The number of elements per entry for fixed-size arrays
64 std::uint64_t fNRepetitions = 0;
65 /// The structural information carried by this field in the data model tree
67 /// Establishes sub field relationships, such as classes and collections
69 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
70 /// order of sub fields.
71 std::vector<DescriptorId_t> fLinkIds;
72
73public:
74 RFieldDescriptor() = default;
75 RFieldDescriptor(const RFieldDescriptor &other) = delete;
79
80 /// In order to handle changes to the serialization routine in future ntuple versions
81 static constexpr std::uint16_t kFrameVersionCurrent = 0;
82 static constexpr std::uint16_t kFrameVersionMin = 0;
83
84 bool operator==(const RFieldDescriptor &other) const;
85 /// Get a copy of the descriptor
86 RFieldDescriptor Clone() const;
87
88 DescriptorId_t GetId() const { return fFieldId; }
91 std::string GetFieldName() const { return fFieldName; }
92 std::string GetFieldDescription() const { return fFieldDescription; }
93 std::string GetTypeName() const { return fTypeName; }
94 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
97 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
98};
99
100
101// clang-format off
102/**
103\class ROOT::Experimental::RColumnDescriptor
104\ingroup NTuple
105\brief Meta-data stored for every column of an ntuple
106*/
107// clang-format on
110
111private:
113 /// Versions can change, e.g., when new column types are added
115 /// Contains the column type and whether it is sorted
117 /// Every column belongs to one and only one field
119 /// A field can be serialized into several columns, which are numbered from zero to $n$
120 std::uint32_t fIndex;
121
122public:
123 /// In order to handle changes to the serialization routine in future ntuple versions
124 static constexpr std::uint16_t kFrameVersionCurrent = 0;
125 static constexpr std::uint16_t kFrameVersionMin = 0;
126
127 RColumnDescriptor() = default;
128 RColumnDescriptor(const RColumnDescriptor &other) = delete;
132
133 bool operator==(const RColumnDescriptor &other) const;
134
135 DescriptorId_t GetId() const { return fColumnId; }
137 RColumnModel GetModel() const { return fModel; }
138 std::uint32_t GetIndex() const { return fIndex; }
140};
141
142
143// clang-format off
144/**
145\class ROOT::Experimental::RClusterDescriptor
146\ingroup NTuple
147\brief Meta-data for a set of ntuple clusters
148
149The cluster descriptor might carry information of only a subset of available clusters, for instance if multiple
150files are chained and not all of them have been processed yet.
151*/
152// clang-format on
155
156public:
157 /// Generic information about the physical location of data. Values depend on the concrete storage type. E.g.,
158 /// for a local file fUrl might be unsused and fPosition might be a file offset. Objects on storage can be compressed
159 /// and therefore we need to store their actual size.
160 struct RLocator {
161 std::int64_t fPosition = 0;
162 std::uint32_t fBytesOnStorage = 0;
163 std::string fUrl;
164
165 bool operator==(const RLocator &other) const {
166 return fPosition == other.fPosition && fBytesOnStorage == other.fBytesOnStorage && fUrl == other.fUrl;
167 }
168 };
169
170 /// The window of element indexes of a particular column in a particular cluster
173 /// A 64bit element index
175 /// A 32bit value for the number of column elements in the cluster
177 /// The usual format for ROOT compression settings (see Compression.h).
178 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
179 std::int64_t fCompressionSettings = 0;
180
181 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
182 // Should this be done on the field level?
183
184 bool operator==(const RColumnRange &other) const {
185 return fColumnId == other.fColumnId && fFirstElementIndex == other.fFirstElementIndex &&
187 }
188
189 bool Contains(NTupleSize_t index) const {
190 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
191 }
192 };
193
194 /// Records the parition of data into pages for a particular column in a particular cluster
195 struct RPageRange {
196 /// We do not need to store the element size / uncompressed page size because we know to which column
197 /// the page belongs
198 struct RPageInfo {
199 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
201 /// The meaning of fLocator depends on the storage backend.
203
204 bool operator==(const RPageInfo &other) const {
205 return fNElements == other.fNElements && fLocator == other.fLocator;
206 }
207 };
208
209 RPageRange() = default;
210 RPageRange(const RPageRange &other) = delete;
211 RPageRange &operator =(const RPageRange &other) = delete;
212 RPageRange(RPageRange &&other) = default;
213 RPageRange &operator =(RPageRange &&other) = default;
214
216 std::vector<RPageInfo> fPageInfos;
217
218 bool operator==(const RPageRange &other) const {
219 return fColumnId == other.fColumnId && fPageInfos == other.fPageInfos;
220 }
221 };
222
223private:
225 /// Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum
227 /// Clusters can be swapped by adjusting the entry offsets
230 /// For pre-fetching / caching an entire contiguous cluster
232
233 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
234 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
235
236public:
237 /// In order to handle changes to the serialization routine in future ntuple versions
238 static constexpr std::uint16_t kFrameVersionCurrent = 0;
239 static constexpr std::uint16_t kFrameVersionMin = 0;
240
246
247 bool operator==(const RClusterDescriptor &other) const;
248
249 DescriptorId_t GetId() const { return fClusterId; }
253 RLocator GetLocator() const { return fLocator; }
254 const RColumnRange &GetColumnRange(DescriptorId_t columnId) const { return fColumnRanges.at(columnId); }
255 const RPageRange &GetPageRange(DescriptorId_t columnId) const { return fPageRanges.at(columnId); }
256};
257
258
259// clang-format off
260/**
261\class ROOT::Experimental::RNTupleDescriptor
262\ingroup NTuple
263\brief The on-storage meta-data of an ntuple
264
265Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
266several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
267relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
268its location and size, and for every column the range of element indexes as well as a list of pages and page
269locations.
270
271The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
272for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
273
274The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
275the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
276writte struct. This allows for forward and backward compatibility when the meta-data evolves.
277*/
278// clang-format on
281
282private:
283 /// The ntuple name needs to be unique in a given storage location (file)
284 std::string fName;
285 /// Free text from the user
286 std::string fDescription;
287 /// The origin of the data
288 std::string fAuthor;
289 /// The current responsible for storing the data
290 std::string fCustodian;
291 /// The time stamp of the ntuple data (immutable)
292 std::chrono::system_clock::time_point fTimeStampData;
293 /// The time stamp of writing the data to storage, which gets updated when re-written
294 std::chrono::system_clock::time_point fTimeStampWritten;
295 /// The version evolves with the ntuple summary meta-data
297 /// Every NTuple gets a unique identifier
299 /// Column sets that are created as derived sets from existing NTuples share the same group id.
300 /// NTuples in the same group have the same number of entries and are supposed to contain associated data.
302
303 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
304 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
305 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
306 /// from a chain of files
307 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
308
309public:
310 // clang-format off
311 /**
312 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorRange
313 \ingroup NTuple
314 \brief Used to loop over a field's associated columns
315 */
316 // clang-format on
318 private:
319 /// The associated NTuple for this range.
321 /// The descriptor ids of the columns ordered by index id
322 std::vector<DescriptorId_t> fColumns = {};
323 public:
324 class RIterator {
325 private:
326 /// The enclosing range's NTuple.
328 /// The enclosing range's descriptor id list.
329 const std::vector<DescriptorId_t> &fColumns;
330 std::size_t fIndex = 0;
331 public:
332 using iterator_category = std::forward_iterator_tag;
335 using difference_type = std::ptrdiff_t;
338
339 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
340 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
341 iterator operator++() { ++fIndex; return *this; }
343 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
344 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
345 };
346
348 : fNTuple(ntuple)
349 {
350 for (unsigned int i = 0; true; ++i) {
351 auto columnId = ntuple.FindColumnId(field.GetId(), i);
352 if (columnId == kInvalidDescriptorId)
353 break;
354 fColumns.emplace_back(columnId);
355 }
356 }
359 };
360
361 // clang-format off
362 /**
363 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorRange
364 \ingroup NTuple
365 \brief Used to loop over a field's child fields
366 */
367 // clang-format on
369 private:
370 /// The associated NTuple for this range.
372 /// The descriptor ids of the child fields. These may be sorted using
373 /// a comparison function.
374 std::vector<DescriptorId_t> fFieldChildren = {};
375 public:
376 class RIterator {
377 private:
378 /// The enclosing range's NTuple.
380 /// The enclosing range's descriptor id list.
381 const std::vector<DescriptorId_t>& fFieldChildren;
382 std::size_t fIndex = 0;
383 public:
384 using iterator_category = std::forward_iterator_tag;
387 using difference_type = std::ptrdiff_t;
390
391 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
392 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
393 iterator operator++() { ++fIndex; return *this; }
397 );
398 }
399 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
400 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
401 };
403 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
404 /// Sort the range using an arbitrary comparison function.
406 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
407 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
408 {
409 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
410 }
412 return RIterator(fNTuple, fFieldChildren, 0);
413 }
416 }
417 };
418
419 /// In order to handle changes to the serialization routine in future ntuple versions
420 static constexpr std::uint16_t kFrameVersionCurrent = 0;
421 static constexpr std::uint16_t kFrameVersionMin = 0;
422 /// The preamble is sufficient to get the length of the header
423 static constexpr unsigned int kNBytesPreamble = 8;
424 /// The last few bytes after the footer store the length of footer and header
425 static constexpr unsigned int kNBytesPostscript = 16;
426
427 RNTupleDescriptor() = default;
428 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
432
433 bool operator ==(const RNTupleDescriptor &other) const;
434
435 /// We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's without libCore
436 /// Serializes the global ntuple information as well as the column and field schemata
437 /// Returns the number of bytes and fills buffer if it is not nullptr.
438 /// TODO(jblomer): instead of runtime testing for nullptr, there should be a template for the case where
439 /// only the size of the buffer is required.
440 std::uint32_t SerializeHeader(void* buffer) const;
441 /// Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
442 std::uint32_t SerializeFooter(void* buffer) const;
443 /// Given kNBytesPostscript bytes, extract the header and footer lengths in bytes
444 static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter);
445
446 std::uint32_t GetHeaderSize() const {
447 return SerializeHeader(nullptr);
448 }
449 std::uint32_t GetFooterSize() const {
450 return SerializeFooter(nullptr);
451 }
452
454 return fFieldDescriptors.at(fieldId);
455 }
457 return fColumnDescriptors.at(columnId);
458 }
460 return fClusterDescriptors.at(clusterId);
461 }
462
464 return RFieldDescriptorRange(*this, fieldDesc);
465 }
467 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
468 {
469 return RFieldDescriptorRange(*this, fieldDesc, comparator);
470 }
472 return GetFieldRange(GetFieldDescriptor(fieldId));
473 }
475 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
476 {
477 return GetFieldRange(GetFieldDescriptor(fieldId), comparator);
478 }
481 }
483 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
484 {
485 return GetFieldRange(GetFieldZeroId(), comparator);
486 }
487
489 {
490 return RColumnDescriptorRange(*this, fieldDesc);
491 }
493 {
494 return RColumnDescriptorRange(*this, GetFieldDescriptor(fieldId));
495 }
496
497 std::string GetName() const { return fName; }
498 std::string GetDescription() const { return fDescription; }
499 std::string GetAuthor() const { return fAuthor; }
500 std::string GetCustodian() const { return fCustodian; }
501 std::chrono::system_clock::time_point GetTimeStampData() const { return fTimeStampData; }
502 std::chrono::system_clock::time_point GetTimeStampWritten() const { return fTimeStampWritten; }
504 RNTupleUuid GetOwnUuid() const { return fOwnUuid; }
506
507 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
508 std::size_t GetNColumns() const { return fColumnDescriptors.size(); }
509 std::size_t GetNClusters() const { return fClusterDescriptors.size(); }
510
511 // The number of entries as seen with the currently loaded cluster meta-data; there might be more
514
515 /// Returns the logical parent of all top-level NTuple data fields.
517 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
518 /// Searches for a top-level field
519 DescriptorId_t FindFieldId(std::string_view fieldName) const;
520 DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
524
525 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
526 /// In case of invalid field ID, an empty string is returned.
527 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
528
529 /// Re-create the C++ model from the stored meta-data
530 std::unique_ptr<RNTupleModel> GenerateModel() const;
531 void PrintInfo(std::ostream &output) const;
532};
533
534namespace Detail {
535 class RFieldBase;
536}
537
538// clang-format off
539/**
540\class ROOT::Experimental::RDanglingFieldDescriptor
541\ingroup NTuple
542\brief A helper class for piece-wise construction of an RFieldDescriptor
543
544Dangling field descriptors describe a single field in isolation. They are
545missing the necessary relationship information (parent field, any child fields)
546required to describe a real NTuple field.
547
548Dangling field descriptors can only become actual descriptors when added to an
549RNTupleDescriptorBuilder instance and then linked to other fields.
550*/
551// clang-format on
553private:
555public:
556 /// Make an empty dangling field descriptor.
558 /// Make a new RDanglingFieldDescriptor based off an existing descriptor.
559 /// Relationship information is lost during the conversion to a
560 /// dangling descriptor:
561 /// * Parent id is reset to an invalid id.
562 /// * Field children ids are forgotten.
563 ///
564 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
565 explicit RDanglingFieldDescriptor(const RFieldDescriptor& fieldDesc);
566
567 /// Make a new RDanglingFieldDescriptor based off a live NTuple field.
569
571 fField.fFieldId = fieldId;
572 return *this;
573 }
575 fField.fFieldVersion = fieldVersion;
576 return *this;
577 }
579 fField.fTypeVersion = typeVersion;
580 return *this;
581 }
582 RDanglingFieldDescriptor& FieldName(const std::string& fieldName) {
583 fField.fFieldName = fieldName;
584 return *this;
585 }
586 RDanglingFieldDescriptor& FieldDescription(const std::string& fieldDescription) {
587 fField.fFieldDescription = fieldDescription;
588 return *this;
589 }
590 RDanglingFieldDescriptor& TypeName(const std::string& typeName) {
591 fField.fTypeName = typeName;
592 return *this;
593 }
594 RDanglingFieldDescriptor& NRepetitions(std::uint64_t nRepetitions) {
595 fField.fNRepetitions = nRepetitions;
596 return *this;
597 }
599 fField.fStructure = structure;
600 return *this;
601 }
602 /// Attempt to make a field descriptor. This may fail if the dangling field
603 /// was not given enough information to make a proper descriptor.
605};
606
607// clang-format off
608/**
609\class ROOT::Experimental::RNTupleDescriptorBuilder
610\ingroup NTuple
611\brief A helper class for piece-wise construction of an RNTupleDescriptor
612
613Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
614*/
615// clang-format on
617private:
619
620public:
621 /// Checks whether invariants hold:
622 /// * NTuple name is valid
623 /// * Fields have valid parent and child ids
625 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
627
628 void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author,
629 const RNTupleVersion &version, const RNTupleUuid &uuid);
630
631 void AddField(const RFieldDescriptor& fieldDesc);
633
634 void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId,
635 const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index);
636
637 void SetFromHeader(void* headerBuffer);
638
639 void AddCluster(DescriptorId_t clusterId, RNTupleVersion version,
640 NTupleSize_t firstEntryIndex, ClusterSize_t nEntries);
644
645 void AddClustersFromFooter(void* footerBuffer);
646};
647
648} // namespace Experimental
649} // namespace ROOT
650
651#endif
char name[80]
Definition TGX11.cxx:110
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
RClusterDescriptor(RClusterDescriptor &&other)=default
RLocator fLocator
For pre-fetching / caching an entire contiguous cluster.
static constexpr std::uint16_t kFrameVersionMin
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
Meta-data stored for every column of an ntuple.
RColumnDescriptor(const RColumnDescriptor &other)=delete
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
A helper class for piece-wise construction of an RFieldDescriptor.
RDanglingFieldDescriptor & TypeVersion(const RNTupleVersion &typeVersion)
RDanglingFieldDescriptor & FieldVersion(const RNTupleVersion &fieldVersion)
RDanglingFieldDescriptor & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RDanglingFieldDescriptor FromField(const Detail::RFieldBase &field)
Make a new RDanglingFieldDescriptor based off a live NTuple field.
RDanglingFieldDescriptor & NRepetitions(std::uint64_t nRepetitions)
RDanglingFieldDescriptor & FieldId(DescriptorId_t fieldId)
RDanglingFieldDescriptor & FieldName(const std::string &fieldName)
RDanglingFieldDescriptor & TypeName(const std::string &typeName)
RDanglingFieldDescriptor()=default
Make an empty dangling field descriptor.
RDanglingFieldDescriptor & FieldDescription(const std::string &fieldDescription)
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
const RNTupleDescriptor & GetDescriptor() const
void SetClusterLocator(DescriptorId_t clusterId, RClusterDescriptor::RLocator locator)
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
void AddField(const RFieldDescriptor &fieldDesc)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
Used to loop over a field's associated columns.
RColumnDescriptorRange(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
RFieldDescriptorRange(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
RFieldDescriptorRange(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
RFieldDescriptorRange GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
RFieldDescriptorRange GetFieldRange(const RFieldDescriptor &fieldDesc) const
RColumnDescriptorRange GetColumnRange(DescriptorId_t fieldId) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RColumnDescriptorRange GetColumnRange(const RFieldDescriptor &fieldDesc) const
std::chrono::system_clock::time_point GetTimeStampData() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::chrono::system_clock::time_point GetTimeStampWritten() const
RFieldDescriptorRange GetFieldRange(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorRange GetFieldRange(DescriptorId_t fieldId) const
static constexpr std::uint16_t kFrameVersionMin
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RFieldDescriptorRange GetTopLevelFields() const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
static constexpr unsigned int kNBytesPreamble
The preamble is sufficient to get the length of the header.
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr unsigned int kNBytesPostscript
The last few bytes after the footer store the length of footer and header.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
void PrintInfo(std::ostream &output) const
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
RFieldDescriptorRange GetFieldRange(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:196
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
Generic information about the physical location of data.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
static void output(int code)
Definition gifencode.c:226