17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
36#include <unordered_map>
37#include <unordered_set>
40namespace Experimental {
42class RFieldDescriptorBuilder;
43class RNTupleDescriptor;
44class RNTupleDescriptorBuilder;
48class RColumnElementBase;
293 std::size_t pageSize);
339 std::unordered_set<DescriptorId_t>
GetColumnIds()
const;
686 std::unique_ptr<RNTupleDescriptor>
Clone()
const;
947 :
fCluster(clusterId, firstEntryIndex, nEntries)
997 static std::vector<RClusterDescriptorBuilder>
1050 void SetNTuple(
const std::string_view
name,
const std::string_view description);
1062 const RColumnModel &model, std::uint32_t
index, std::uint64_t firstElementIdx = 0U);
TObject * clone(const char *newname) const override
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
A column element encapsulates the translation between basic C++ types and their column representation...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & AddDeferredColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for deferred columns missing in this cluster.
RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
Make an empty cluster descriptor builder.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptor fCluster
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool HasPageLocations() const
RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
DescriptorId_t fClusterId
NTupleSize_t GetFirstEntryIndex() const
std::unordered_set< DescriptorId_t > GetColumnIds() const
RClusterDescriptor Clone() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
ClusterSize_t GetNEntries() const
std::uint64_t GetBytesOnStorage() const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
DescriptorId_t GetId() const
RClusterDescriptor()=default
void EnsureHasPageLocations() const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder()=default
void AddCluster(DescriptorId_t clusterId)
RClusterGroupDescriptor fClusterGroup
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
DescriptorId_t GetId() const
Clusters are stored in cluster groups.
RClusterGroupDescriptor Clone() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint32_t GetPageListLength() const
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t GetNClusters() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor()=default
DescriptorId_t fClusterGroupId
bool Contains(DescriptorId_t clusterId) const
bool operator==(const RClusterGroupDescriptor &other) const
RNTupleLocator GetPageListLocator() const
std::vector< DescriptorId_t > fClusterIds
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
DescriptorId_t GetId() const
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
DescriptorId_t GetFieldId() const
RColumnDescriptor fColumn
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
Meta-data stored for every column of an ntuple.
RColumnModel GetModel() const
bool IsDeferredColumn() const
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::uint64_t fFirstElementIndex
Specifies the index for the first stored element for this column.
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t GetFirstElementIndex() const
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
DescriptorId_t GetFieldId() const
RColumnDescriptor()=default
RColumnModel fModel
Contains the column type and whether it is sorted.
DescriptorId_t GetLogicalId() const
bool IsAliasColumn() const
DescriptorId_t GetPhysicalId() const
std::uint32_t GetIndex() const
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
A helper class for piece-wise construction of an RColumnGroupDescriptor.
void AddColumn(DescriptorId_t physicalId)
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
RResult< RColumnGroupDescriptor > MoveDescriptor()
RColumnGroupDescriptorBuilder()=default
RColumnGroupDescriptor fColumnGroup
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
DescriptorId_t GetId() const
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool HasAllColumns() const
bool Contains(DescriptorId_t physicalId) const
RColumnGroupDescriptor()=default
DescriptorId_t fColumnGroupId
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of an RNTuple column.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
DescriptorId_t GetParentId() const
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
DescriptorId_t GetParentId() const
std::string GetTypeAlias() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t GetTypeVersion() const
std::string GetFieldName() const
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t GetFieldVersion() const
RFieldDescriptor()=default
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t GetId() const
const std::vector< DescriptorId_t > & GetLinkIds() const
std::string GetFieldDescription() const
std::string GetTypeName() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
std::uint64_t GetNRepetitions() const
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
ENTupleStructure GetStructure() const
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleDescriptor MoveDescriptor()
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
std::uint32_t GetHeaderCRC32() const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
std::uint32_t fHeaderCRC32
void SetHeaderCRC32(std::uint32_t crc32)
void SetNTuple(const std::string_view name, const std::string_view description)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void SetOnDiskHeaderSize(std::uint64_t size)
RNTupleDescriptor fDescriptor
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void AddField(const RFieldDescriptor &fieldDesc)
std::forward_iterator_tag iterator_category
bool operator!=(const iterator &rh) const
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
bool operator==(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
std::ptrdiff_t difference_type
Used to loop over all the clusters of an ntuple (in unspecified order)
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::forward_iterator_tag iterator_category
std::ptrdiff_t difference_type
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
Used to loop over all the cluster groups of an ntuple (in unspecified order)
RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
bool operator==(const iterator &rh) const
bool operator!=(const iterator &rh) const
std::ptrdiff_t difference_type
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
std::forward_iterator_tag iterator_category
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
void CollectColumnIds(DescriptorId_t fieldId)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
bool operator!=(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator==(const iterator &rh) const
std::forward_iterator_tag iterator_category
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::ptrdiff_t difference_type
Used to loop over a field's child fields.
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::size_t GetNLogicalColumns() const
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::string GetDescription() const
std::string GetName() const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t GetGeneration() const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RNTupleDescriptor()=default
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::size_t GetNFields() const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
std::uint64_t GetOnDiskFooterSize() const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
std::size_t GetNClusterGroups() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RColumnDescriptorIterable GetColumnIterable() const
std::size_t GetNPhysicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The window of element indexes of a particular column in a particular cluster.
bool Contains(NTupleSize_t index) const
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
The number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
DescriptorId_t fPhysicalColumnId
Wrap the integer in a struct in order to avoid template specialization clash with std::uint32_t.
Generic information about the physical location of data.