16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
35#include <unordered_map>
36#include <unordered_set>
39namespace Experimental {
41class RFieldDescriptorBuilder;
42class RNTupleDescriptor;
43class RNTupleDescriptorBuilder;
279 std::unordered_set<DescriptorId_t>
GetColumnIds()
const;
378 for (
unsigned int i = 0;
true; ++i) {
517 static void LocateMetadata(
const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter);
763 std::uint64_t firstElementIndex,
764 std::uint32_t compressionSettings,
798 void SetNTuple(
const std::string_view
name,
const std::string_view description,
const std::string_view author,
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder()=default
Make an empty cluster descriptor builder.
RResult< RClusterDescriptor > MoveDescriptor()
Attempt to make a cluster descriptor.
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptor fCluster
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
bool ContainsColumn(DescriptorId_t columnId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
static constexpr std::uint16_t kFrameVersionMin
RClusterDescriptor(const RClusterDescriptor &other)=delete
RNTupleVersion GetVersion() const
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
DescriptorId_t fClusterId
NTupleSize_t GetFirstEntryIndex() const
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
ClusterSize_t GetNEntries() const
std::uint64_t GetBytesOnStorage() const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
DescriptorId_t GetId() const
RClusterDescriptor()=default
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
DescriptorId_t GetFieldId() const
RColumnDescriptor fColumn
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Meta-data stored for every column of an ntuple.
RNTupleVersion GetVersion() const
RColumnModel GetModel() const
RColumnDescriptor(const RColumnDescriptor &other)=delete
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t GetId() const
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
DescriptorId_t GetFieldId() const
RColumnDescriptor()=default
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t GetIndex() const
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & FieldVersion(const RNTupleVersion &fieldVersion)
DescriptorId_t GetParentId() const
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & TypeVersion(const RNTupleVersion &typeVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
DescriptorId_t GetParentId() const
RNTupleVersion GetTypeVersion() const
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
RNTupleVersion GetFieldVersion() const
std::string GetFieldName() const
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
RFieldDescriptor()=default
DescriptorId_t GetId() const
const std::vector< DescriptorId_t > & GetLinkIds() const
std::string GetFieldDescription() const
std::string GetTypeName() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
std::uint64_t GetNRepetitions() const
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
ENTupleStructure GetStructure() const
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleDescriptor MoveDescriptor()
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
Internal::RNTupleSerializer::RClusterGroup GetClusterGroup(std::uint32_t id) const
void AddClusterSummary(Internal::RNTupleSerializer::RClusterSummary &clusterSummary)
std::uint32_t GetHeaderCRC32() const
void SetFromHeader(void *headerBuffer)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void SetOnDiskFooterSize(std::uint64_t size)
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
std::vector< Internal::RNTupleSerializer::RClusterGroup > fClusterGroups
std::uint32_t fHeaderCRC32
void SetHeaderCRC32(std::uint32_t crc32)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void SetOnDiskHeaderSize(std::uint64_t size)
void AddClusterGroup(Internal::RNTupleSerializer::RClusterGroup &clusterGroup)
void AddClustersFromFooter(void *footerBuffer)
RNTupleDescriptor fDescriptor
std::vector< Internal::RNTupleSerializer::RClusterSummary > fClusterSummaries
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
void AddField(const RFieldDescriptor &fieldDesc)
std::forward_iterator_tag iterator_category
bool operator!=(const iterator &rh) const
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
bool operator==(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
std::ptrdiff_t difference_type
Used to loop over all the clusters of an ntuple (in unspecified order)
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
bool operator==(const iterator &rh) const
bool operator!=(const iterator &rh) const
std::ptrdiff_t difference_type
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
std::forward_iterator_tag iterator_category
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
bool operator!=(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator==(const iterator &rh) const
std::forward_iterator_tag iterator_category
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::ptrdiff_t difference_type
Used to loop over a field's child fields.
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::string GetCustodian() const
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
std::string GetAuthor() const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::string GetDescription() const
std::string GetName() const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
std::chrono::system_clock::time_point GetTimeStampData() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
NTupleSize_t GetNEntries() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::chrono::system_clock::time_point GetTimeStampWritten() const
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RNTupleUuid GetOwnUuid() const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
static constexpr std::uint16_t kFrameVersionMin
RNTupleDescriptor()=default
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::size_t GetNFields() const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
std::uint64_t GetOnDiskFooterSize() const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
std::uint32_t GetHeaderSize() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
std::size_t GetNColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
RNTupleUuid GetGroupUuid() const
static constexpr unsigned int kNBytesPreamble
The preamble is sufficient to get the length of the header.
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
std::uint32_t GetFooterSize() const
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
RNTupleVersion GetVersion() const
static constexpr unsigned int kNBytesPostscript
The last few bytes after the footer store the length of footer and header.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
bool Contains(NTupleSize_t index) const
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Generic information about the physical location of data.
static void output(int code)