16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
35#include <unordered_map>
36#include <unordered_set>
39namespace Experimental {
41class RFieldDescriptorBuilder;
42class RNTupleDescriptor;
43class RNTupleDescriptorBuilder;
313 std::unordered_set<DescriptorId_t>
GetColumnIds()
const;
455 for (
unsigned int i = 0;
true; ++i) {
625 std::unique_ptr<RNTupleDescriptor>
Clone()
const;
861 :
fCluster(clusterId, firstEntryIndex, nEntries)
906 static std::vector<RClusterDescriptorBuilder>
959 void SetNTuple(
const std::string_view
name,
const std::string_view description);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
Make an empty cluster descriptor builder.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptor fCluster
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool HasPageLocations() const
bool ContainsColumn(DescriptorId_t columnId) const
RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
RClusterDescriptor(RClusterDescriptor &&other)=default
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
DescriptorId_t fClusterId
NTupleSize_t GetFirstEntryIndex() const
std::unordered_set< DescriptorId_t > GetColumnIds() const
RClusterDescriptor Clone() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
ClusterSize_t GetNEntries() const
std::uint64_t GetBytesOnStorage() const
DescriptorId_t GetId() const
RClusterDescriptor()=default
void EnsureHasPageLocations() const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder()=default
void AddCluster(DescriptorId_t clusterId)
RClusterGroupDescriptor fClusterGroup
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
DescriptorId_t GetId() const
Clusters are stored in cluster groups.
RClusterGroupDescriptor Clone() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint32_t GetPageListLength() const
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t GetNClusters() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor()=default
DescriptorId_t fClusterGroupId
bool Contains(DescriptorId_t clusterId) const
bool operator==(const RClusterGroupDescriptor &other) const
RNTupleLocator GetPageListLocator() const
std::vector< DescriptorId_t > fClusterIds
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
DescriptorId_t GetId() const
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
DescriptorId_t GetFieldId() const
RColumnDescriptor fColumn
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Meta-data stored for every column of an ntuple.
RColumnModel GetModel() const
RColumnDescriptor(const RColumnDescriptor &other)=delete
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t GetId() const
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
DescriptorId_t GetFieldId() const
RColumnDescriptor()=default
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t GetIndex() const
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
A helper class for piece-wise construction of an RColumnGroupDescriptor.
void AddColumn(DescriptorId_t columnId)
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
RResult< RColumnGroupDescriptor > MoveDescriptor()
RColumnGroupDescriptorBuilder()=default
RColumnGroupDescriptor fColumnGroup
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
const std::unordered_set< DescriptorId_t > & GetColumnIds() const
DescriptorId_t GetId() const
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool HasAllColumns() const
RColumnGroupDescriptor()=default
DescriptorId_t fColumnGroupId
std::unordered_set< DescriptorId_t > fColumnIds
bool Contains(DescriptorId_t columnId) const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of a column in a tree.
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
DescriptorId_t GetParentId() const
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
DescriptorId_t GetParentId() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t GetTypeVersion() const
std::string GetFieldName() const
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t GetFieldVersion() const
RFieldDescriptor()=default
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t GetId() const
const std::vector< DescriptorId_t > & GetLinkIds() const
std::string GetFieldDescription() const
std::string GetTypeName() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
std::uint64_t GetNRepetitions() const
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
ENTupleStructure GetStructure() const
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleDescriptor MoveDescriptor()
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
std::uint32_t GetHeaderCRC32() const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
std::uint32_t fHeaderCRC32
void SetHeaderCRC32(std::uint32_t crc32)
void SetNTuple(const std::string_view name, const std::string_view description)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void SetOnDiskHeaderSize(std::uint64_t size)
RNTupleDescriptor fDescriptor
void AddField(const RFieldDescriptor &fieldDesc)
std::forward_iterator_tag iterator_category
bool operator!=(const iterator &rh) const
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
bool operator==(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
std::ptrdiff_t difference_type
Used to loop over all the clusters of an ntuple (in unspecified order)
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::forward_iterator_tag iterator_category
std::ptrdiff_t difference_type
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
Used to loop over all the cluster groups of an ntuple (in unspecified order)
RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
bool operator==(const iterator &rh) const
bool operator!=(const iterator &rh) const
std::ptrdiff_t difference_type
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
std::forward_iterator_tag iterator_category
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
bool operator!=(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator==(const iterator &rh) const
std::forward_iterator_tag iterator_category
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::ptrdiff_t difference_type
Used to loop over a field's child fields.
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::string GetDescription() const
std::string GetName() const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t GetGeneration() const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RNTupleDescriptor()=default
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::size_t GetNFields() const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
std::uint64_t GetOnDiskFooterSize() const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
std::size_t GetNClusterGroups() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
std::size_t GetNColumns() const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The window of element indexes of a particular column in a particular cluster.
bool Contains(NTupleSize_t index) const
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Generic information about the physical location of data.