Logo ROOT  
Reference Guide
RNTupleSerialize.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleSerialize.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleSerialize
17#define ROOT7_RNTupleSerialize
18
19#include <ROOT/RError.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RSpan.hxx>
22
23#include <cstdint>
24#include <map>
25#include <string>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30
31enum class EColumnType;
32class RClusterDescriptor;
33class RClusterDescriptorBuilder;
34class RNTupleDescriptor;
35class RNTupleDescriptorBuilder;
36
37
38namespace Internal {
39
40// clang-format off
41/**
42\class ROOT::Experimental::Internal::RNTupleSerializer
43\ingroup NTuple
44\brief A helper class for serializing and deserialization of the RNTuple binary format
45
46All serialization and deserialization routines return the number of bytes processed (written or read).
47
48The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
49a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
50
51Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
52*/
53// clang-format on
55public:
56 /// In order to handle changes to the serialization routine in future ntuple versions
57 static constexpr std::uint16_t kEnvelopeCurrentVersion = 1;
58 static constexpr std::uint16_t kEnvelopeMinVersion = 1;
59 static constexpr std::uint32_t kReleaseCandidateTag = 1;
60
61 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
62 static constexpr std::uint16_t kFlagAliasField = 0x02;
63
64 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
65 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
66 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
67
68 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
69
71 std::uint32_t fUnzippedSize = 0;
73 };
74
76 std::uint64_t fFirstEntry = 0;
77 std::uint64_t fNEntries = 0;
78 /// -1 for "all columns"
79 std::int32_t fColumnGroupID = -1;
80 };
81
83 std::uint32_t fNClusters = 0;
85 };
86
87 /// The serialization context is used for the piecewise serialization of a descriptor. During header serialization,
88 /// the mapping of in-memory field and column IDs to physical IDs is built so that it can be used for the
89 /// footer serialization in a second step.
90 class RContext {
91 private:
92 std::uint32_t fHeaderSize = 0;
93 std::uint32_t fHeaderCrc32 = 0;
94 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysFieldIDs;
95 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysColumnIDs;
96 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysClusterIDs;
97 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysClusterGroupIDs;
98 std::vector<DescriptorId_t> fPhys2MemFieldIDs;
99 std::vector<DescriptorId_t> fPhys2MemColumnIDs;
100 std::vector<DescriptorId_t> fPhys2MemClusterIDs;
101 std::vector<DescriptorId_t> fPhys2MemClusterGroupIDs;
102
103 public:
104 void SetHeaderSize(std::uint32_t size) { fHeaderSize = size; }
105 std::uint32_t GetHeaderSize() const { return fHeaderSize; }
106 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCrc32 = crc32; }
107 std::uint32_t GetHeaderCRC32() const { return fHeaderCrc32; }
109 auto physId = fPhys2MemFieldIDs.size();
110 fMem2PhysFieldIDs[memId] = physId;
111 fPhys2MemFieldIDs.push_back(memId);
112 return physId;
113 }
115 auto physId = fPhys2MemColumnIDs.size();
116 fMem2PhysColumnIDs[memId] = physId;
117 fPhys2MemColumnIDs.push_back(memId);
118 return physId;
119 }
121 auto physId = fPhys2MemClusterIDs.size();
122 fMem2PhysClusterIDs[memId] = physId;
123 fPhys2MemClusterIDs.push_back(memId);
124 return physId;
125 }
127 {
128 auto physId = fPhys2MemClusterGroupIDs.size();
129 fMem2PhysClusterGroupIDs[memId] = physId;
130 fPhys2MemClusterGroupIDs.push_back(memId);
131 return physId;
132 }
141 };
142
143 /// Writes a CRC32 checksum of the byte range given by data and length.
144 static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length,
145 std::uint32_t &crc32, void *buffer);
146 /// Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
147 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32);
148 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length);
149
150 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
151 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
152 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
153 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
154
155 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
156 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
157 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
158 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
159
160 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
161 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
162 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
163 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
164
165 static std::uint32_t SerializeString(const std::string &val, void *buffer);
166 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val);
167
168 /// While we could just interpret the enums as ints, we make the translation explicit
169 /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
170 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
171 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
174
175 static std::uint32_t SerializeEnvelopePreamble(void *buffer);
176 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer);
177 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size,
178 std::uint32_t &crc32, void *buffer);
179 // The bufSize must include the 4 bytes for the final CRC32 checksum.
180 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize);
181 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32);
182
183 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
184 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
185 static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size);
186 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
187 std::uint32_t &frameSize, std::uint32_t &nitems);
188 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
189 std::uint32_t &frameSize);
190
191 // An empty flags vector will be serialized as a single, zero feature flag
192 // The most significant bit in every flag is reserved and must _not_ be set
193 static std::uint32_t SerializeFeatureFlags(const std::vector<std::int64_t> &flags, void *buffer);
194 static RResult<std::uint32_t> DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize,
195 std::vector<std::int64_t> &flags);
196
197 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
198 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
199 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator);
200 static RResult<std::uint32_t> DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize,
201 REnvelopeLink &envelopeLink);
202
203 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
204 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
205 static RResult<std::uint32_t> DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize,
206 RClusterSummary &clusterSummary);
207 static RResult<std::uint32_t> DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize,
208 RClusterGroup &clusterGroup);
209
210 static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc);
211 static std::uint32_t SerializePageListV1(void *buffer,
212 const RNTupleDescriptor &desc,
213 std::span<DescriptorId_t> physClusterIDs,
214 const RContext &context);
215 static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
216
217 static RResult<void> DeserializeHeaderV1(const void *buffer,
218 std::uint32_t bufSize,
219 RNTupleDescriptorBuilder &descBuilder);
220 static RResult<void> DeserializeFooterV1(const void *buffer,
221 std::uint32_t bufSize,
222 RNTupleDescriptorBuilder &descBuilder);
223 // The clusters vector must be initialized with the cluster summaries corresponding to the page list
224 static RResult<void> DeserializePageListV1(const void *buffer,
225 std::uint32_t bufSize,
226 std::vector<RClusterDescriptorBuilder> &clusters);
227}; // class RNTupleSerializer
228
229} // namespace Internal
230} // namespace Experimental
231} // namespace ROOT
232
233#endif // ROOT7_RNTupleSerialize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetPhysClusterGroupId(DescriptorId_t memId) const
DescriptorId_t GetPhysClusterId(DescriptorId_t memId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysColumnIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysClusterGroupIDs
DescriptorId_t GetMemClusterId(DescriptorId_t physId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysClusterIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysFieldIDs
DescriptorId_t GetPhysColumnId(DescriptorId_t memId) const
DescriptorId_t GetMemFieldId(DescriptorId_t physId) const
DescriptorId_t GetPhysFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemClusterGroupId(DescriptorId_t physId) const
DescriptorId_t GetMemColumnId(DescriptorId_t physId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static constexpr std::uint32_t kReleaseCandidateTag
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static constexpr std::uint16_t kFlagRepetitiveField
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static constexpr std::uint16_t kEnvelopeCurrentVersion
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr std::uint32_t kFlagNonNegativeColumn
static constexpr std::uint32_t kFlagSortDesColumn
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static constexpr std::uint16_t kFlagAliasField
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static constexpr std::uint16_t kEnvelopeMinVersion
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static constexpr std::uint32_t kFlagSortAscColumn
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static constexpr DescriptorId_t kZeroFieldId
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
Definition: RError.hxx:257
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition: RError.hxx:195
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:37
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Generic information about the physical location of data.