Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleSerialize.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2021-08-02
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleSerialize
18#define ROOT7_RNTupleSerialize
19
20#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <cstdint>
25#include <map>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace Experimental {
31
32enum class EColumnType;
33class RClusterDescriptor;
34class RNTupleDescriptor;
35
36namespace Internal {
37
38class RClusterDescriptorBuilder;
39class RNTupleDescriptorBuilder;
40
41// clang-format off
42/**
43\class ROOT::Experimental::Internal::RNTupleSerializer
44\ingroup NTuple
45\brief A helper class for serializing and deserialization of the RNTuple binary format
46
47All serialization and deserialization routines return the number of bytes processed (written or read).
48
49The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
50a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
51
52Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
53*/
54// clang-format on
56public:
57 static constexpr std::uint16_t kEnvelopeTypeHeader = 0x01;
58 static constexpr std::uint16_t kEnvelopeTypeFooter = 0x02;
59 static constexpr std::uint16_t kEnvelopeTypePageList = 0x03;
60
61 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
62
63 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
64 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
65 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
66 static constexpr std::uint32_t kFlagDeferredColumn = 0x08;
67
68 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
69
71 std::uint64_t fLength = 0;
73 };
74
76 std::uint64_t fFirstEntry = 0;
77 std::uint64_t fNEntries = 0;
78 /// -1 for "all columns"
79 std::int32_t fColumnGroupID = -1;
80 };
81
83 std::uint64_t fMinEntry = 0;
84 std::uint64_t fEntrySpan = 0;
85 std::uint32_t fNClusters = 0;
87 };
88
89 /// The serialization context is used for the piecewise serialization of a descriptor. During header serialization,
90 /// the mapping of in-memory field and column IDs to on-disk IDs is built so that it can be used for the
91 /// footer serialization in a second step.
92 class RContext {
93 private:
94 std::uint64_t fHeaderSize = 0;
95 std::uint64_t fHeaderXxHash3 = 0;
96 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskFieldIDs;
97 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskColumnIDs;
98 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterIDs;
99 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterGroupIDs;
100 std::vector<DescriptorId_t> fOnDisk2MemFieldIDs;
101 std::vector<DescriptorId_t> fOnDisk2MemColumnIDs;
102 std::vector<DescriptorId_t> fOnDisk2MemClusterIDs;
103 std::vector<DescriptorId_t> fOnDisk2MemClusterGroupIDs;
104 std::size_t fHeaderExtensionOffset = -1U;
105
106 public:
107 void SetHeaderSize(std::uint64_t size) { fHeaderSize = size; }
108 std::uint64_t GetHeaderSize() const { return fHeaderSize; }
109 void SetHeaderXxHash3(std::uint64_t xxhash3) { fHeaderXxHash3 = xxhash3; }
110 std::uint64_t GetHeaderXxHash3() const { return fHeaderXxHash3; }
111 /// Map an in-memory field ID to its on-disk counterpart. It is allowed to call this function multiple times for
112 /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
114 auto onDiskId = fOnDisk2MemFieldIDs.size();
115 const auto &p = fMem2OnDiskFieldIDs.try_emplace(memId, onDiskId);
116 if (p.second)
117 fOnDisk2MemFieldIDs.push_back(memId);
118 return (*p.first).second;
119 }
120 /// Map an in-memory column ID to its on-disk counterpart. It is allowed to call this function multiple times for
121 /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
123 auto onDiskId = fOnDisk2MemColumnIDs.size();
124 const auto &p = fMem2OnDiskColumnIDs.try_emplace(memId, onDiskId);
125 if (p.second)
126 fOnDisk2MemColumnIDs.push_back(memId);
127 return (*p.first).second;
128 }
130 auto onDiskId = fOnDisk2MemClusterIDs.size();
131 fMem2OnDiskClusterIDs[memId] = onDiskId;
132 fOnDisk2MemClusterIDs.push_back(memId);
133 return onDiskId;
134 }
136 {
137 auto onDiskId = fOnDisk2MemClusterGroupIDs.size();
138 fMem2OnDiskClusterGroupIDs[memId] = onDiskId;
139 fOnDisk2MemClusterGroupIDs.push_back(memId);
140 return onDiskId;
141 }
142 /// Map in-memory field and column IDs to their on-disk counterparts. This function is unconditionally called
143 /// during header serialization. This function must be manually called after an incremental schema update as page
144 /// list serialization requires all columns to be mapped.
145 void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension);
146
151 {
152 return fMem2OnDiskClusterGroupIDs.at(memId);
153 }
158 {
159 return fOnDisk2MemClusterGroupIDs[onDiskId];
160 }
161
162 /// Return a vector containing the in-memory field ID for each on-disk counterpart, in order, i.e. the `i`-th
163 /// value corresponds to the in-memory field ID for `i`-th on-disk ID
164 const std::vector<DescriptorId_t> &GetOnDiskFieldList() const { return fOnDisk2MemFieldIDs; }
165 /// Mark the first on-disk field ID that is part of the schema extension
167 /// Return the offset of the first element in `fOnDisk2MemFieldIDs` that is part of the schema extension
168 std::size_t GetHeaderExtensionOffset() const { return fHeaderExtensionOffset; }
169 };
170
171 /// Writes a XxHash-3 64bit checksum of the byte range given by data and length.
172 static std::uint32_t
173 SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer);
174 /// Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
175 static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3);
176 static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length);
177
178 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
179 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
180 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
181 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
182
183 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
184 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
185 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
186 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
187
188 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
189 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
190 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
191 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
192
193 static std::uint32_t SerializeString(const std::string &val, void *buffer);
194 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val);
195
196 /// While we could just interpret the enums as ints, we make the translation explicit
197 /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
198 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
199 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
202
203 static std::uint32_t SerializeEnvelopePreamble(std::uint16_t envelopeType, void *buffer);
204 static std::uint32_t SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size);
205 static std::uint32_t
206 SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size, std::uint64_t &xxhash3);
207 // The bufSize must include the 8 bytes for the final xxhash3 checksum.
209 DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType);
211 DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType, std::uint64_t &xxhash3);
212
213 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
214 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
215 static std::uint32_t SerializeFramePostscript(void *frame, std::uint64_t size);
217 DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize, std::uint32_t &nitems);
219 DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize);
220
221 // An empty flags vector will be serialized as a single, zero feature flag
222 // The most significant bit in every flag is reserved and must _not_ be set
223 static std::uint32_t SerializeFeatureFlags(const std::vector<std::uint64_t> &flags, void *buffer);
225 DeserializeFeatureFlags(const void *buffer, std::uint64_t bufSize, std::vector<std::uint64_t> &flags);
226
227 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
228 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
229 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint64_t bufSize, RNTupleLocator &locator);
231 DeserializeEnvelopeLink(const void *buffer, std::uint64_t bufSize, REnvelopeLink &envelopeLink);
232
233 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
234 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
236 DeserializeClusterSummary(const void *buffer, std::uint64_t bufSize, RClusterSummary &clusterSummary);
238 DeserializeClusterGroup(const void *buffer, std::uint64_t bufSize, RClusterGroup &clusterGroup);
239
240 /// Serialize the schema description in `desc` into `buffer`. If `forHeaderExtension` is true, serialize only the
241 /// fields and columns tagged as part of the header extension (see `RNTupleDescriptorBuilder::BeginHeaderExtension`).
242 static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context,
243 bool forHeaderExtension = false);
245 DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
246
247 static RContext SerializeHeader(void *buffer, const RNTupleDescriptor &desc);
248 static std::uint32_t SerializePageList(void *buffer, const RNTupleDescriptor &desc,
249 std::span<DescriptorId_t> physClusterIDs, const RContext &context);
250 static std::uint32_t SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
251
252 static RResult<void>
253 DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
254 static RResult<void>
255 DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
256 // The clusters vector must be initialized with the cluster summaries corresponding to the page list
257 static RResult<void> DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId,
258 RNTupleDescriptor &desc);
259}; // class RNTupleSerializer
260
261} // namespace Internal
262} // namespace Experimental
263} // namespace ROOT
264
265#endif // ROOT7_RNTupleSerialize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
A helper class for piece-wise construction of an RNTupleDescriptor.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetOnDiskColumnId(DescriptorId_t memId) const
const std::vector< DescriptorId_t > & GetOnDiskFieldList() const
Return a vector containing the in-memory field ID for each on-disk counterpart, in order,...
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterIDs
DescriptorId_t GetOnDiskFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t onDiskId) const
DescriptorId_t MapColumnId(DescriptorId_t memId)
Map an in-memory column ID to its on-disk counterpart.
DescriptorId_t MapFieldId(DescriptorId_t memId)
Map an in-memory field ID to its on-disk counterpart.
DescriptorId_t GetMemFieldId(DescriptorId_t onDiskId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskFieldIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskColumnIDs
std::size_t GetHeaderExtensionOffset() const
Return the offset of the first element in fOnDisk2MemFieldIDs that is part of the schema extension.
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterGroupIDs
DescriptorId_t GetMemClusterGroupId(DescriptorId_t onDiskId) const
DescriptorId_t GetMemClusterId(DescriptorId_t onDiskId) const
DescriptorId_t GetOnDiskClusterGroupId(DescriptorId_t memId) const
DescriptorId_t GetOnDiskClusterId(DescriptorId_t memId) const
void BeginHeaderExtension()
Mark the first on-disk field ID that is part of the schema extension.
void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension)
Map in-memory field and column IDs to their on-disk counterparts.
A helper class for serializing and deserialization of the RNTuple binary format.
static std::uint32_t SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer)
Writes a XxHash-3 64bit checksum of the byte range given by data and length.
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint64_t bufSize, RClusterSummary &clusterSummary)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static constexpr std::uint16_t kEnvelopeTypeHeader
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint64_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size)
static RContext SerializeHeader(void *buffer, const RNTupleDescriptor &desc)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::uint64_t > &flags, void *buffer)
static constexpr std::uint16_t kFlagRepetitiveField
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize, std::uint32_t &nitems)
static constexpr std::uint32_t kFlagNonNegativeColumn
static std::uint32_t SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static constexpr std::uint32_t kFlagSortDesColumn
static constexpr std::uint32_t kFlagDeferredColumn
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(std::uint16_t envelopeType, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::uint64_t size)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId, RNTupleDescriptor &desc)
static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context, bool forHeaderExtension=false)
Serialize the schema description in desc into buffer.
static RResult< std::uint32_t > DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static constexpr std::uint16_t kEnvelopeTypePageList
static RResult< void > VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3)
Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint64_t bufSize, std::vector< std::uint64_t > &flags)
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static constexpr std::uint16_t kEnvelopeTypeFooter
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint64_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static constexpr std::uint32_t kFlagSortAscColumn
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType)
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint64_t bufSize, RNTupleLocator &locator)
The on-storage meta-data of an ntuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Generic information about the physical location of data.