Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.cxx
Go to the documentation of this file.
1/// \file RNTupleSerialize.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2021-08-02
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
18#include <ROOT/RColumnModel.hxx>
19#include <ROOT/RError.hxx>
22
23#include <RVersion.h>
24#include <TBufferFile.h>
25#include <TClass.h>
26#include <TList.h>
27#include <TStreamerInfo.h>
29#include <xxhash.h>
30
31#include <cassert>
32#include <cstring> // for memcpy
33#include <deque>
34#include <set>
35#include <unordered_map>
36
37template <typename T>
39
40
41namespace {
43
44std::uint32_t SerializeField(const ROOT::Experimental::RFieldDescriptor &fieldDesc,
45 ROOT::Experimental::DescriptorId_t onDiskParentId, void *buffer)
46{
47
48 auto base = reinterpret_cast<unsigned char *>(buffer);
49 auto pos = base;
50 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
51
52 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
53
54 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetFieldVersion(), *where);
55 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetTypeVersion(), *where);
56 pos += RNTupleSerializer::SerializeUInt32(onDiskParentId, *where);
57 pos += RNTupleSerializer::SerializeFieldStructure(fieldDesc.GetStructure(), *where);
58 if (fieldDesc.GetNRepetitions() > 0) {
59 pos += RNTupleSerializer::SerializeUInt16(RNTupleSerializer::kFlagRepetitiveField, *where);
60 pos += RNTupleSerializer::SerializeUInt64(fieldDesc.GetNRepetitions(), *where);
61 } else {
62 pos += RNTupleSerializer::SerializeUInt16(0, *where);
63 }
64 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldName(), *where);
65 pos += RNTupleSerializer::SerializeString(fieldDesc.GetTypeName(), *where);
66 pos += RNTupleSerializer::SerializeString(fieldDesc.GetTypeAlias(), *where);
67 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldDescription(), *where);
68
69 auto size = pos - base;
70 RNTupleSerializer::SerializeFramePostscript(base, size);
71
72 return size;
73}
74
75// clang-format off
76/// Serialize, in order, fields enumerated in `fieldList` to `buffer`. `firstOnDiskId` specifies the on-disk ID for the
77/// first element in the `fieldList` sequence. Before calling this function `RContext::MapSchema()` should have been
78/// called on `context` in order to map in-memory field IDs to their on-disk counterpart.
79/// \return The number of bytes written to the output buffer; if `buffer` is `nullptr` no data is serialized and the
80/// required buffer size is returned
81// clang-format on
82std::uint32_t SerializeFieldList(const ROOT::Experimental::RNTupleDescriptor &desc,
83 std::span<const ROOT::Experimental::DescriptorId_t> fieldList,
84 std::size_t firstOnDiskId,
86{
87 auto base = reinterpret_cast<unsigned char *>(buffer);
88 auto pos = base;
89 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
90
91 auto fieldZeroId = desc.GetFieldZeroId();
92 ROOT::Experimental::DescriptorId_t onDiskFieldId = firstOnDiskId;
93 for (auto fieldId : fieldList) {
94 const auto &f = desc.GetFieldDescriptor(fieldId);
95 auto onDiskParentId =
96 (f.GetParentId() == fieldZeroId) ? onDiskFieldId : context.GetOnDiskFieldId(f.GetParentId());
97 pos += SerializeField(f, onDiskParentId, *where);
98 ++onDiskFieldId;
99 }
100
101 return pos - base;
102}
103
104RResult<std::uint32_t> DeserializeField(const void *buffer, std::uint64_t bufSize,
106{
107 using ENTupleStructure = ROOT::Experimental::ENTupleStructure;
108
109 auto base = reinterpret_cast<const unsigned char *>(buffer);
110 auto bytes = base;
111 std::uint64_t frameSize;
112 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
113 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
114 if (!result)
115 return R__FORWARD_ERROR(result);
116 bytes += result.Unwrap();
117
118 std::uint32_t fieldVersion;
119 std::uint32_t typeVersion;
120 std::uint32_t parentId;
121 // initialize properly for call to SerializeFieldStructure()
122 ENTupleStructure structure{ENTupleStructure::kLeaf};
123 std::uint16_t flags;
124 if (fnFrameSizeLeft() < 3 * sizeof(std::uint32_t) +
125 RNTupleSerializer::SerializeFieldStructure(structure, nullptr) +
126 sizeof(std::uint16_t))
127 {
128 return R__FAIL("field record frame too short");
129 }
130 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldVersion);
131 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersion);
132 bytes += RNTupleSerializer::DeserializeUInt32(bytes, parentId);
133 result = RNTupleSerializer::DeserializeFieldStructure(bytes, structure);
134 if (!result)
135 return R__FORWARD_ERROR(result);
136 bytes += result.Unwrap();
137 bytes += RNTupleSerializer::DeserializeUInt16(bytes, flags);
138 fieldDesc.FieldVersion(fieldVersion).TypeVersion(typeVersion).ParentId(parentId).Structure(structure);
139
140 if (flags & RNTupleSerializer::kFlagRepetitiveField) {
141 if (fnFrameSizeLeft() < sizeof(std::uint64_t))
142 return R__FAIL("field record frame too short");
143 std::uint64_t nRepetitions;
144 bytes += RNTupleSerializer::DeserializeUInt64(bytes, nRepetitions);
145 fieldDesc.NRepetitions(nRepetitions);
146 }
147
148 std::string fieldName;
149 std::string typeName;
150 std::string aliasName;
151 std::string description;
152 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), fieldName).Unwrap();
153 if (!result)
154 return R__FORWARD_ERROR(result);
155 bytes += result.Unwrap();
156 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), typeName).Unwrap();
157 if (!result)
158 return R__FORWARD_ERROR(result);
159 bytes += result.Unwrap();
160 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), aliasName).Unwrap();
161 if (!result)
162 return R__FORWARD_ERROR(result);
163 bytes += result.Unwrap();
164 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), description).Unwrap();
165 if (!result)
166 return R__FORWARD_ERROR(result);
167 bytes += result.Unwrap();
168 fieldDesc.FieldName(fieldName).TypeName(typeName).TypeAlias(aliasName).FieldDescription(description);
169
170 return frameSize;
171}
172
173std::uint32_t SerializeColumnList(const ROOT::Experimental::RNTupleDescriptor &desc,
174 std::span<const ROOT::Experimental::DescriptorId_t> fieldList,
176 void *buffer)
177{
178 using RColumnElementBase = ROOT::Experimental::Internal::RColumnElementBase;
179
180 auto base = reinterpret_cast<unsigned char *>(buffer);
181 auto pos = base;
182 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
183
184 for (auto parentId : fieldList) {
185 for (const auto &c : desc.GetColumnIterable(parentId)) {
186 if (c.IsAliasColumn())
187 continue;
188
189 auto frame = pos;
190 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
191
192 auto type = c.GetModel().GetType();
193 pos += RNTupleSerializer::SerializeColumnType(type, *where);
194 pos += RNTupleSerializer::SerializeUInt16(RColumnElementBase::GetBitsOnStorage(type), *where);
195 pos += RNTupleSerializer::SerializeUInt32(context.GetOnDiskFieldId(c.GetFieldId()), *where);
196 std::uint32_t flags = 0;
197 // TODO(jblomer): add support for descending columns in the column model
198 if (c.GetModel().GetIsSorted())
199 flags |= RNTupleSerializer::kFlagSortAscColumn;
200 // TODO(jblomer): fix for unsigned integer types
202 flags |= RNTupleSerializer::kFlagNonNegativeColumn;
203 const std::uint64_t firstElementIdx = c.GetFirstElementIndex();
204 if (firstElementIdx > 0)
205 flags |= RNTupleSerializer::kFlagDeferredColumn;
206 pos += RNTupleSerializer::SerializeUInt32(flags, *where);
207 if (flags & RNTupleSerializer::kFlagDeferredColumn)
208 pos += RNTupleSerializer::SerializeUInt64(firstElementIdx, *where);
209
210 pos += RNTupleSerializer::SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
211 }
212 }
213
214 return pos - base;
215}
216
217RResult<std::uint32_t> DeserializeColumn(const void *buffer, std::uint64_t bufSize,
219{
221
222 auto base = reinterpret_cast<const unsigned char *>(buffer);
223 auto bytes = base;
224 std::uint64_t frameSize;
225 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
226 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
227 if (!result)
228 return R__FORWARD_ERROR(result);
229 bytes += result.Unwrap();
230
231 // Initialize properly for SerializeColumnType
232 EColumnType type{EColumnType::kIndex32};
233 std::uint16_t bitsOnStorage;
234 std::uint32_t fieldId;
235 std::uint32_t flags;
236 std::uint64_t firstElementIdx = 0;
237 if (fnFrameSizeLeft() < RNTupleSerializer::SerializeColumnType(type, nullptr) +
238 sizeof(std::uint16_t) + 2 * sizeof(std::uint32_t))
239 {
240 return R__FAIL("column record frame too short");
241 }
242 result = RNTupleSerializer::DeserializeColumnType(bytes, type);
243 if (!result)
244 return R__FORWARD_ERROR(result);
245 bytes += result.Unwrap();
246 bytes += RNTupleSerializer::DeserializeUInt16(bytes, bitsOnStorage);
247 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldId);
248 bytes += RNTupleSerializer::DeserializeUInt32(bytes, flags);
249 if (flags & RNTupleSerializer::kFlagDeferredColumn) {
250 if (fnFrameSizeLeft() < sizeof(std::uint64_t))
251 return R__FAIL("column record frame too short");
252 bytes += RNTupleSerializer::DeserializeUInt64(bytes, firstElementIdx);
253 }
254
256 return R__FAIL("column element size mismatch");
257
258 const bool isSorted = (flags & (RNTupleSerializer::kFlagSortAscColumn | RNTupleSerializer::kFlagSortDesColumn));
259 columnDesc.FieldId(fieldId).Model({type, isSorted}).FirstElementIndex(firstElementIdx);
260
261 return frameSize;
262}
263
264std::uint32_t SerializeExtraTypeInfo(const ROOT::Experimental::RExtraTypeInfoDescriptor &desc, void *buffer)
265{
266 auto base = reinterpret_cast<unsigned char *>(buffer);
267 auto pos = base;
268 void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
269
270 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
271
272 pos += RNTupleSerializer::SerializeExtraTypeInfoId(desc.GetContentId(), *where);
273 pos += RNTupleSerializer::SerializeUInt32(desc.GetTypeVersionFrom(), *where);
274 pos += RNTupleSerializer::SerializeUInt32(desc.GetTypeVersionTo(), *where);
275 pos += RNTupleSerializer::SerializeString(desc.GetTypeName(), *where);
276 pos += RNTupleSerializer::SerializeString(desc.GetContent(), *where);
277
278 auto size = pos - base;
279 RNTupleSerializer::SerializeFramePostscript(base, size);
280
281 return size;
282}
283
284std::uint32_t SerializeExtraTypeInfoList(const ROOT::Experimental::RNTupleDescriptor &ntplDesc, void *buffer)
285{
286 auto base = reinterpret_cast<unsigned char *>(buffer);
287 auto pos = base;
288 void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
289
290 for (const auto &extraTypeInfoDesc : ntplDesc.GetExtraTypeInfoIterable()) {
291 pos += SerializeExtraTypeInfo(extraTypeInfoDesc, *where);
292 }
293
294 return pos - base;
295}
296
297RResult<std::uint32_t> DeserializeExtraTypeInfo(const void *buffer, std::uint64_t bufSize,
299{
301
302 auto base = reinterpret_cast<const unsigned char *>(buffer);
303 auto bytes = base;
304 std::uint64_t frameSize;
305 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
306 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
307 if (!result)
308 return R__FORWARD_ERROR(result);
309 bytes += result.Unwrap();
310
311 EExtraTypeInfoIds contentId{EExtraTypeInfoIds::kInvalid};
312 std::uint32_t typeVersionFrom;
313 std::uint32_t typeVersionTo;
314 if (fnFrameSizeLeft() < 3 * sizeof(std::uint32_t)) {
315 return R__FAIL("extra type info record frame too short");
316 }
317 result = RNTupleSerializer::DeserializeExtraTypeInfoId(bytes, contentId);
318 if (!result)
319 return R__FORWARD_ERROR(result);
320 bytes += result.Unwrap();
321 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersionFrom);
322 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersionTo);
323
324 std::string typeName;
325 std::string content;
326 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), typeName).Unwrap();
327 if (!result)
328 return R__FORWARD_ERROR(result);
329 bytes += result.Unwrap();
330 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), content).Unwrap();
331 if (!result)
332 return R__FORWARD_ERROR(result);
333 bytes += result.Unwrap();
334
335 desc.ContentId(contentId)
336 .TypeVersionFrom(typeVersionFrom)
337 .TypeVersionTo(typeVersionTo)
338 .TypeName(typeName)
339 .Content(content);
340
341 return frameSize;
342}
343
344std::uint32_t SerializeLocatorPayloadURI(const ROOT::Experimental::RNTupleLocator &locator, unsigned char *buffer)
345{
346 const auto &uri = locator.GetPosition<std::string>();
347 if (uri.length() >= (1 << 16))
348 throw ROOT::Experimental::RException(R__FAIL("locator too large"));
349 if (buffer)
350 memcpy(buffer, uri.data(), uri.length());
351 return uri.length();
352}
353
354void DeserializeLocatorPayloadURI(const unsigned char *buffer, std::uint32_t payloadSize,
356{
357 locator.fBytesOnStorage = 0;
358 auto &uri = locator.fPosition.emplace<std::string>();
359 uri.resize(payloadSize);
360 memcpy(uri.data(), buffer, payloadSize);
361}
362
363std::uint32_t SerializeLocatorPayloadObject64(const ROOT::Experimental::RNTupleLocator &locator, unsigned char *buffer)
364{
366 if (buffer) {
367 RNTupleSerializer::SerializeUInt32(locator.fBytesOnStorage, buffer);
368 RNTupleSerializer::SerializeUInt64(data.fLocation, buffer + sizeof(std::uint32_t));
369 }
370 return sizeof(std::uint32_t) + sizeof(std::uint64_t);
371}
372
373void DeserializeLocatorPayloadObject64(const unsigned char *buffer, ROOT::Experimental::RNTupleLocator &locator)
374{
376 RNTupleSerializer::DeserializeUInt32(buffer, locator.fBytesOnStorage);
377 RNTupleSerializer::DeserializeUInt64(buffer + sizeof(std::uint32_t), data.fLocation);
378}
379
380std::uint32_t SerializeAliasColumnList(const ROOT::Experimental::RNTupleDescriptor &desc,
381 std::span<const ROOT::Experimental::DescriptorId_t> fieldList,
383 void *buffer)
384{
385 auto base = reinterpret_cast<unsigned char *>(buffer);
386 auto pos = base;
387 void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
388
389 for (auto parentId : fieldList) {
390 for (const auto &c : desc.GetColumnIterable(parentId)) {
391 if (!c.IsAliasColumn())
392 continue;
393
394 auto frame = pos;
395 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
396
397 pos += RNTupleSerializer::SerializeUInt32(context.GetOnDiskColumnId(c.GetPhysicalId()), *where);
398 pos += RNTupleSerializer::SerializeUInt32(context.GetOnDiskFieldId(c.GetFieldId()), *where);
399
400 pos += RNTupleSerializer::SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
401 }
402 }
403
404 return pos - base;
405}
406
407RResult<std::uint32_t> DeserializeAliasColumn(const void *buffer, std::uint64_t bufSize,
408 std::uint32_t &physicalColumnId, std::uint32_t &fieldId)
409{
410 auto base = reinterpret_cast<const unsigned char *>(buffer);
411 auto bytes = base;
412 std::uint64_t frameSize;
413 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
414 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
415 if (!result)
416 return R__FORWARD_ERROR(result);
417 bytes += result.Unwrap();
418
419 if (fnFrameSizeLeft() < 2 * sizeof(std::uint32_t)) {
420 return R__FAIL("alias column record frame too short");
421 }
422
423 bytes += RNTupleSerializer::DeserializeUInt32(bytes, physicalColumnId);
424 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldId);
425
426 return frameSize;
427}
428
429} // anonymous namespace
430
432 std::uint64_t length,
433 std::uint64_t &xxhash3, void *buffer)
434{
435 if (buffer != nullptr) {
436 xxhash3 = XXH3_64bits(data, length);
437 SerializeUInt64(xxhash3, buffer);
438 }
439 return 8;
440}
441
443 std::uint64_t length,
444 std::uint64_t &xxhash3)
445{
446 auto checksumReal = XXH3_64bits(data, length);
447 DeserializeUInt64(data + length, xxhash3);
448 if (xxhash3 != checksumReal)
449 return R__FAIL("XxHash-3 checksum mismatch");
450 return RResult<void>::Success();
451}
452
455{
456 std::uint64_t xxhash3;
457 return R__FORWARD_RESULT(VerifyXxHash3(data, length, xxhash3));
458}
459
460
461std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt16(std::int16_t val, void *buffer)
462{
463 if (buffer != nullptr) {
464 auto bytes = reinterpret_cast<unsigned char *>(buffer);
465 bytes[0] = (val & 0x00FF);
466 bytes[1] = (val & 0xFF00) >> 8;
467 }
468 return 2;
469}
470
471std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt16(const void *buffer, std::int16_t &val)
472{
473 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
474 val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
475 return 2;
476}
477
478std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt16(std::uint16_t val, void *buffer)
479{
480 return SerializeInt16(val, buffer);
481}
482
483std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt16(const void *buffer, std::uint16_t &val)
484{
485 return DeserializeInt16(buffer, *reinterpret_cast<std::int16_t *>(&val));
486}
487
488std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt32(std::int32_t val, void *buffer)
489{
490 if (buffer != nullptr) {
491 auto bytes = reinterpret_cast<unsigned char *>(buffer);
492 bytes[0] = (val & 0x000000FF);
493 bytes[1] = (val & 0x0000FF00) >> 8;
494 bytes[2] = (val & 0x00FF0000) >> 16;
495 bytes[3] = (val & 0xFF000000) >> 24;
496 }
497 return 4;
498}
499
500std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt32(const void *buffer, std::int32_t &val)
501{
502 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
503 val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
504 (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
505 return 4;
506}
507
508std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt32(std::uint32_t val, void *buffer)
509{
510 return SerializeInt32(val, buffer);
511}
512
513std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt32(const void *buffer, std::uint32_t &val)
514{
515 return DeserializeInt32(buffer, *reinterpret_cast<std::int32_t *>(&val));
516}
517
518std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt64(std::int64_t val, void *buffer)
519{
520 if (buffer != nullptr) {
521 auto bytes = reinterpret_cast<unsigned char *>(buffer);
522 bytes[0] = (val & 0x00000000000000FF);
523 bytes[1] = (val & 0x000000000000FF00) >> 8;
524 bytes[2] = (val & 0x0000000000FF0000) >> 16;
525 bytes[3] = (val & 0x00000000FF000000) >> 24;
526 bytes[4] = (val & 0x000000FF00000000) >> 32;
527 bytes[5] = (val & 0x0000FF0000000000) >> 40;
528 bytes[6] = (val & 0x00FF000000000000) >> 48;
529 bytes[7] = (val & 0xFF00000000000000) >> 56;
530 }
531 return 8;
532}
533
534std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt64(const void *buffer, std::int64_t &val)
535{
536 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
537 val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
538 (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
539 (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
540 (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
541 return 8;
542}
543
544std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt64(std::uint64_t val, void *buffer)
545{
546 return SerializeInt64(val, buffer);
547}
548
549std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt64(const void *buffer, std::uint64_t &val)
550{
551 return DeserializeInt64(buffer, *reinterpret_cast<std::int64_t *>(&val));
552}
553
554std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeString(const std::string &val, void *buffer)
555{
556 if (buffer) {
557 auto pos = reinterpret_cast<unsigned char *>(buffer);
558 pos += SerializeUInt32(val.length(), pos);
559 memcpy(pos, val.data(), val.length());
560 }
561 return sizeof(std::uint32_t) + val.length();
562}
563
565 std::uint64_t bufSize,
566 std::string &val)
567{
568 if (bufSize < sizeof(std::uint32_t))
569 return R__FAIL("string buffer too short");
570 bufSize -= sizeof(std::uint32_t);
571
572 auto base = reinterpret_cast<const unsigned char *>(buffer);
573 auto bytes = base;
574 std::uint32_t length;
575 bytes += DeserializeUInt32(buffer, length);
576 if (bufSize < length)
577 return R__FAIL("string buffer too short");
578
579 val.resize(length);
580 memcpy(&val[0], bytes, length);
581 return sizeof(std::uint32_t) + length;
582}
583
584std::uint32_t
586{
588 switch (type) {
589 case EColumnType::kIndex64: return SerializeUInt16(0x01, buffer);
590 case EColumnType::kIndex32: return SerializeUInt16(0x02, buffer);
591 case EColumnType::kSwitch: return SerializeUInt16(0x03, buffer);
592 case EColumnType::kByte: return SerializeUInt16(0x04, buffer);
593 case EColumnType::kChar: return SerializeUInt16(0x05, buffer);
594 case EColumnType::kBit: return SerializeUInt16(0x06, buffer);
595 case EColumnType::kReal64: return SerializeUInt16(0x07, buffer);
596 case EColumnType::kReal32: return SerializeUInt16(0x08, buffer);
597 case EColumnType::kReal16: return SerializeUInt16(0x09, buffer);
598 case EColumnType::kInt64: return SerializeUInt16(0x16, buffer);
599 case EColumnType::kUInt64: return SerializeUInt16(0x0A, buffer);
600 case EColumnType::kInt32: return SerializeUInt16(0x17, buffer);
601 case EColumnType::kUInt32: return SerializeUInt16(0x0B, buffer);
602 case EColumnType::kInt16: return SerializeUInt16(0x18, buffer);
603 case EColumnType::kUInt16: return SerializeUInt16(0x0C, buffer);
604 case EColumnType::kInt8: return SerializeUInt16(0x19, buffer);
605 case EColumnType::kUInt8: return SerializeUInt16(0x0D, buffer);
606 case EColumnType::kSplitIndex64: return SerializeUInt16(0x0E, buffer);
607 case EColumnType::kSplitIndex32: return SerializeUInt16(0x0F, buffer);
608 case EColumnType::kSplitReal64: return SerializeUInt16(0x10, buffer);
609 case EColumnType::kSplitReal32: return SerializeUInt16(0x11, buffer);
610 case EColumnType::kSplitInt64: return SerializeUInt16(0x1A, buffer);
611 case EColumnType::kSplitUInt64: return SerializeUInt16(0x13, buffer);
612 case EColumnType::kSplitInt32: return SerializeUInt16(0x1B, buffer);
613 case EColumnType::kSplitUInt32: return SerializeUInt16(0x14, buffer);
614 case EColumnType::kSplitInt16: return SerializeUInt16(0x1C, buffer);
615 case EColumnType::kSplitUInt16: return SerializeUInt16(0x15, buffer);
616 default: throw RException(R__FAIL("ROOT bug: unexpected column type"));
617 }
618}
619
623{
625 std::uint16_t onDiskType;
626 auto result = DeserializeUInt16(buffer, onDiskType);
627 switch (onDiskType) {
628 case 0x01: type = EColumnType::kIndex64; break;
629 case 0x02: type = EColumnType::kIndex32; break;
630 case 0x03: type = EColumnType::kSwitch; break;
631 case 0x04: type = EColumnType::kByte; break;
632 case 0x05: type = EColumnType::kChar; break;
633 case 0x06: type = EColumnType::kBit; break;
634 case 0x07: type = EColumnType::kReal64; break;
635 case 0x08: type = EColumnType::kReal32; break;
636 case 0x09: type = EColumnType::kReal16; break;
637 case 0x16: type = EColumnType::kInt64; break;
638 case 0x0A: type = EColumnType::kUInt64; break;
639 case 0x17: type = EColumnType::kInt32; break;
640 case 0x0B: type = EColumnType::kUInt32; break;
641 case 0x18: type = EColumnType::kInt16; break;
642 case 0x0C: type = EColumnType::kUInt16; break;
643 case 0x19: type = EColumnType::kInt8; break;
644 case 0x0D: type = EColumnType::kUInt8; break;
645 case 0x0E: type = EColumnType::kSplitIndex64; break;
646 case 0x0F: type = EColumnType::kSplitIndex32; break;
647 case 0x10: type = EColumnType::kSplitReal64; break;
648 case 0x11: type = EColumnType::kSplitReal32; break;
649 case 0x1A: type = EColumnType::kSplitInt64; break;
650 case 0x13: type = EColumnType::kSplitUInt64; break;
651 case 0x1B: type = EColumnType::kSplitInt32; break;
652 case 0x14: type = EColumnType::kSplitUInt32; break;
653 case 0x1C: type = EColumnType::kSplitInt16; break;
654 case 0x15: type = EColumnType::kSplitUInt16; break;
655 default: return R__FAIL("unexpected on-disk column type");
656 }
657 return result;
658}
659
660std::uint32_t
662 void *buffer)
663{
665 switch (structure) {
666 case ENTupleStructure::kLeaf: return SerializeUInt16(0x00, buffer);
667 case ENTupleStructure::kCollection: return SerializeUInt16(0x01, buffer);
668 case ENTupleStructure::kRecord: return SerializeUInt16(0x02, buffer);
669 case ENTupleStructure::kVariant: return SerializeUInt16(0x03, buffer);
670 case ENTupleStructure::kUnsplit: return SerializeUInt16(0x04, buffer);
671 default: throw RException(R__FAIL("ROOT bug: unexpected field structure type"));
672 }
673}
674
676 const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
677{
679 std::uint16_t onDiskValue;
680 auto result = DeserializeUInt16(buffer, onDiskValue);
681 switch (onDiskValue) {
682 case 0x00: structure = ENTupleStructure::kLeaf; break;
683 case 0x01: structure = ENTupleStructure::kCollection; break;
684 case 0x02: structure = ENTupleStructure::kRecord; break;
685 case 0x03: structure = ENTupleStructure::kVariant; break;
686 case 0x04: structure = ENTupleStructure::kUnsplit; break;
687 default: return R__FAIL("unexpected on-disk field structure value");
688 }
689 return result;
690}
691
692std::uint32_t
694 void *buffer)
695{
697 switch (id) {
698 case EExtraTypeInfoIds::kStreamerInfo: return SerializeUInt32(0x00, buffer);
699 default: throw RException(R__FAIL("ROOT bug: unexpected extra type info id"));
700 }
701}
702
706{
708 std::uint32_t onDiskValue;
709 auto result = DeserializeUInt32(buffer, onDiskValue);
710 switch (onDiskValue) {
711 case 0x00: id = EExtraTypeInfoIds::kStreamerInfo; break;
712 default:
714 R__LOG_DEBUG(0, NTupleLog()) << "Unknown extra type info id: " << onDiskValue;
715 }
716 return result;
717}
718
719std::uint32_t
721{
722 auto base = reinterpret_cast<unsigned char *>(buffer);
723 auto pos = base;
724 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
725
726 pos += SerializeUInt64(envelopeType, *where);
727 // The 48bits size information is filled in the postscript
728 return pos - base;
729}
730
732 std::uint64_t size,
733 std::uint64_t &xxhash3)
734{
735 if (size < sizeof(std::uint64_t))
736 throw RException(R__FAIL("envelope size too small"));
737 if (size >= static_cast<uint64_t>(1) << 48)
738 throw RException(R__FAIL("envelope size too big"));
739 if (envelope) {
740 std::uint64_t typeAndSize;
741 DeserializeUInt64(envelope, typeAndSize);
742 typeAndSize |= (size + 8) << 16;
743 SerializeUInt64(typeAndSize, envelope);
744 }
745 return SerializeXxHash3(envelope, size, xxhash3, envelope ? (envelope + size) : nullptr);
746}
747
749 std::uint64_t size)
750{
751 std::uint64_t xxhash3;
752 return SerializeEnvelopePostscript(envelope, size, xxhash3);
753}
754
757 std::uint16_t expectedType, std::uint64_t &xxhash3)
758{
759 const std::uint64_t minEnvelopeSize = sizeof(std::uint64_t) + sizeof(std::uint64_t);
760 if (bufSize < minEnvelopeSize)
761 return R__FAIL("invalid envelope buffer, too short");
762
763 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
764 auto base = bytes;
765
766 std::uint64_t typeAndSize;
767 bytes += DeserializeUInt64(bytes, typeAndSize);
768
769 std::uint16_t envelopeType = typeAndSize & 0xFFFF;
770 if (envelopeType != expectedType) {
771 return R__FAIL("envelope type mismatch: expected " + std::to_string(expectedType) + ", found " +
772 std::to_string(envelopeType));
773 }
774
775 std::uint64_t envelopeSize = typeAndSize >> 16;
776 if (bufSize < envelopeSize)
777 return R__FAIL("envelope buffer size too small");
778 if (envelopeSize < minEnvelopeSize)
779 return R__FAIL("invalid envelope, too short");
780
781 auto result = VerifyXxHash3(base, envelopeSize - 8, xxhash3);
782 if (!result)
783 return R__FORWARD_ERROR(result);
784
785 return sizeof(typeAndSize);
786}
787
789 std::uint64_t bufSize,
790 std::uint16_t expectedType)
791{
792 std::uint64_t xxhash3;
793 return R__FORWARD_RESULT(DeserializeEnvelope(buffer, bufSize, expectedType, xxhash3));
794}
795
796
798{
799 // Marker: multiply the final size with 1
800 return SerializeInt64(1, buffer);
801}
802
803
805 std::uint32_t nitems, void *buffer)
806{
807 auto base = reinterpret_cast<unsigned char *>(buffer);
808 auto pos = base;
809 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
810
811 // Marker: multiply the final size with -1
812 pos += SerializeInt64(-1, *where);
813 pos += SerializeUInt32(nitems, *where);
814 return pos - base;
815}
816
818{
819 auto preambleSize = sizeof(std::int64_t);
820 if (size < preambleSize)
821 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
822 if (frame) {
823 std::int64_t marker;
824 DeserializeInt64(frame, marker);
825 if ((marker < 0) && (size < (sizeof(std::uint32_t) + preambleSize)))
826 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
827 SerializeInt64(marker * static_cast<int64_t>(size), frame);
828 }
829 return 0;
830}
831
834 std::uint64_t &frameSize, std::uint32_t &nitems)
835{
836 std::uint64_t minSize = sizeof(std::int64_t);
837 if (bufSize < minSize)
838 return R__FAIL("frame too short");
839
840 std::int64_t *ssize = reinterpret_cast<std::int64_t *>(&frameSize);
841 DeserializeInt64(buffer, *ssize);
842
843 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
844 bytes += minSize;
845
846 if (*ssize >= 0) {
847 // Record frame
848 nitems = 1;
849 } else {
850 // List frame
851 minSize += sizeof(std::uint32_t);
852 if (bufSize < minSize)
853 return R__FAIL("frame too short");
854 bytes += DeserializeUInt32(bytes, nitems);
855 *ssize = -(*ssize);
856 }
857
858 if (frameSize < minSize)
859 return R__FAIL("corrupt frame size");
860 if (bufSize < frameSize)
861 return R__FAIL("frame too short");
862
863 return bytes - reinterpret_cast<const unsigned char *>(buffer);
864}
865
868 std::uint64_t &frameSize)
869{
870 std::uint32_t nitems;
871 return R__FORWARD_RESULT(DeserializeFrameHeader(buffer, bufSize, frameSize, nitems));
872}
873
874std::uint32_t
876 void *buffer)
877{
878 if (flags.empty())
879 return SerializeUInt64(0, buffer);
880
881 if (buffer) {
882 auto bytes = reinterpret_cast<unsigned char *>(buffer);
883
884 for (unsigned i = 0; i < flags.size(); ++i) {
885 if (flags[i] & 0x8000000000000000)
886 throw RException(R__FAIL("feature flag out of bounds"));
887
888 // The MSb indicates that another Int64 follows; set this bit to 1 for all except the last element
889 if (i == (flags.size() - 1))
890 SerializeUInt64(flags[i], bytes);
891 else
892 bytes += SerializeUInt64(flags[i] | 0x8000000000000000, bytes);
893 }
894 }
895 return (flags.size() * sizeof(std::int64_t));
896}
897
900 std::vector<std::uint64_t> &flags)
901{
902 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
903
904 flags.clear();
905 std::uint64_t f;
906 do {
907 if (bufSize < sizeof(std::uint64_t))
908 return R__FAIL("feature flag buffer too short");
909 bytes += DeserializeUInt64(bytes, f);
910 bufSize -= sizeof(std::uint64_t);
911 flags.emplace_back(f & ~0x8000000000000000);
912 } while (f & 0x8000000000000000);
913
914 return (flags.size() * sizeof(std::uint64_t));
915}
916
918 const RNTupleLocator &locator, void *buffer)
919{
921 throw RException(R__FAIL("locator is not serializable"));
922
923 std::uint32_t size = 0;
924 if (locator.fType == RNTupleLocator::kTypeFile) {
925 if (static_cast<std::int32_t>(locator.fBytesOnStorage) < 0)
926 throw RException(R__FAIL("locator too large"));
927 size += SerializeUInt32(locator.fBytesOnStorage, buffer);
928 size += SerializeUInt64(locator.GetPosition<std::uint64_t>(),
929 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
930 return size;
931 }
932
933 auto payloadp = buffer ? reinterpret_cast<unsigned char *>(buffer) + sizeof(std::int32_t) : nullptr;
934 switch (locator.fType) {
935 case RNTupleLocator::kTypeURI: size += SerializeLocatorPayloadURI(locator, payloadp); break;
936 case RNTupleLocator::kTypeDAOS: size += SerializeLocatorPayloadObject64(locator, payloadp); break;
937 default: throw RException(R__FAIL("locator has unknown type"));
938 }
939 std::int32_t head = sizeof(std::int32_t) + size;
940 head |= locator.fReserved << 16;
941 head |= static_cast<int>(locator.fType & 0x7F) << 24;
942 head = -head;
944 return size;
945}
946
948 std::uint64_t bufSize,
949 RNTupleLocator &locator)
950{
951 if (bufSize < sizeof(std::int32_t))
952 return R__FAIL("too short locator");
953
954 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
955 std::int32_t head;
956
957 bytes += DeserializeInt32(bytes, head);
958 bufSize -= sizeof(std::int32_t);
959 if (head < 0) {
960 head = -head;
961 const int type = head >> 24;
962 const std::uint32_t payloadSize = (static_cast<std::uint32_t>(head) & 0x0000FFFF) - sizeof(std::int32_t);
963 if (bufSize < payloadSize)
964 return R__FAIL("too short locator");
965 locator.fType = static_cast<RNTupleLocator::ELocatorType>(type);
966 locator.fReserved = static_cast<std::uint32_t>(head >> 16) & 0xFF;
967 switch (type) {
968 case RNTupleLocator::kTypeURI: DeserializeLocatorPayloadURI(bytes, payloadSize, locator); break;
969 case RNTupleLocator::kTypeDAOS: DeserializeLocatorPayloadObject64(bytes, locator); break;
970 default: return R__FAIL("unsupported locator type: " + std::to_string(type));
971 }
972 bytes += payloadSize;
973 } else {
974 if (bufSize < sizeof(std::uint64_t))
975 return R__FAIL("too short locator");
976 auto &offset = locator.fPosition.emplace<std::uint64_t>();
978 bytes += DeserializeUInt64(bytes, offset);
979 locator.fBytesOnStorage = head;
980 }
981
982 return bytes - reinterpret_cast<const unsigned char *>(buffer);
983}
984
986 const REnvelopeLink &envelopeLink, void *buffer)
987{
988 auto size = SerializeUInt64(envelopeLink.fLength, buffer);
989 size += SerializeLocator(envelopeLink.fLocator,
990 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
991 return size;
992}
993
996 REnvelopeLink &envelopeLink)
997{
998 if (bufSize < sizeof(std::int64_t))
999 return R__FAIL("too short envelope link");
1000
1001 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
1002 bytes += DeserializeUInt64(bytes, envelopeLink.fLength);
1003 bufSize -= sizeof(std::uint64_t);
1004 auto result = DeserializeLocator(bytes, bufSize, envelopeLink.fLocator);
1005 if (!result)
1006 return R__FORWARD_ERROR(result);
1007 bytes += result.Unwrap();
1008 return bytes - reinterpret_cast<const unsigned char *>(buffer);
1009}
1010
1011
1013 const RClusterSummary &clusterSummary, void *buffer)
1014{
1015 auto base = reinterpret_cast<unsigned char *>(buffer);
1016 auto pos = base;
1017 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1018
1019 auto frame = pos;
1020 pos += SerializeRecordFramePreamble(*where);
1021 pos += SerializeUInt64(clusterSummary.fFirstEntry, *where);
1022 if (clusterSummary.fColumnGroupID >= 0) {
1023 pos += SerializeInt64(-static_cast<int64_t>(clusterSummary.fNEntries), *where);
1024 pos += SerializeUInt32(clusterSummary.fColumnGroupID, *where);
1025 } else {
1026 pos += SerializeInt64(static_cast<int64_t>(clusterSummary.fNEntries), *where);
1027 }
1028 auto size = pos - frame;
1029 pos += SerializeFramePostscript(frame, size);
1030 return size;
1031}
1032
1035 RClusterSummary &clusterSummary)
1036{
1037 auto base = reinterpret_cast<const unsigned char *>(buffer);
1038 auto bytes = base;
1039 std::uint64_t frameSize;
1040 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
1041 if (!result)
1042 return R__FORWARD_ERROR(result);
1043 bytes += result.Unwrap();
1044
1045 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
1046 if (fnFrameSizeLeft() < 2 * sizeof(std::uint64_t))
1047 return R__FAIL("too short cluster summary");
1048
1049 bytes += DeserializeUInt64(bytes, clusterSummary.fFirstEntry);
1050 std::int64_t nEntries;
1051 bytes += DeserializeInt64(bytes, nEntries);
1052
1053 if (nEntries < 0) {
1054 if (fnFrameSizeLeft() < sizeof(std::uint32_t))
1055 return R__FAIL("too short cluster summary");
1056 clusterSummary.fNEntries = -nEntries;
1057 std::uint32_t columnGroupID;
1058 bytes += DeserializeUInt32(bytes, columnGroupID);
1059 clusterSummary.fColumnGroupID = columnGroupID;
1060 } else {
1061 clusterSummary.fNEntries = nEntries;
1062 clusterSummary.fColumnGroupID = -1;
1063 }
1064
1065 return frameSize;
1066}
1067
1068
1070 const RClusterGroup &clusterGroup, void *buffer)
1071{
1072 auto base = reinterpret_cast<unsigned char *>(buffer);
1073 auto pos = base;
1074 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1075
1076 auto frame = pos;
1077 pos += SerializeRecordFramePreamble(*where);
1078 pos += SerializeUInt64(clusterGroup.fMinEntry, *where);
1079 pos += SerializeUInt64(clusterGroup.fEntrySpan, *where);
1080 pos += SerializeUInt32(clusterGroup.fNClusters, *where);
1081 pos += SerializeEnvelopeLink(clusterGroup.fPageListEnvelopeLink, *where);
1082 auto size = pos - frame;
1083 pos += SerializeFramePostscript(frame, size);
1084 return size;
1085}
1086
1089 RClusterGroup &clusterGroup)
1090{
1091 auto base = reinterpret_cast<const unsigned char *>(buffer);
1092 auto bytes = base;
1093
1094 std::uint64_t frameSize;
1095 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
1096 if (!result)
1097 return R__FORWARD_ERROR(result);
1098 bytes += result.Unwrap();
1099
1100 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - base); };
1101 if (fnFrameSizeLeft() < sizeof(std::uint32_t) + 2 * sizeof(std::uint64_t))
1102 return R__FAIL("too short cluster group");
1103
1104 bytes += DeserializeUInt64(bytes, clusterGroup.fMinEntry);
1105 bytes += DeserializeUInt64(bytes, clusterGroup.fEntrySpan);
1106 bytes += DeserializeUInt32(bytes, clusterGroup.fNClusters);
1107 result = DeserializeEnvelopeLink(bytes, fnFrameSizeLeft(), clusterGroup.fPageListEnvelopeLink);
1108 if (!result)
1109 return R__FORWARD_ERROR(result);
1110
1111 return frameSize;
1112}
1113
1115 bool forHeaderExtension)
1116{
1117 auto fieldZeroId = desc.GetFieldZeroId();
1118 auto depthFirstTraversal = [&](std::span<DescriptorId_t> fieldTrees, auto doForEachField) {
1119 std::deque<DescriptorId_t> idQueue{fieldTrees.begin(), fieldTrees.end()};
1120 while (!idQueue.empty()) {
1121 auto fieldId = idQueue.front();
1122 idQueue.pop_front();
1123 // Field zero has no physical representation nor columns of its own; recurse over its subfields only
1124 if (fieldId != fieldZeroId)
1125 doForEachField(fieldId);
1126 unsigned i = 0;
1127 for (const auto &f : desc.GetFieldIterable(fieldId))
1128 idQueue.insert(idQueue.begin() + i++, f.GetId());
1129 }
1130 };
1131
1132 R__ASSERT(desc.GetNFields() > 0); // we must have at least a zero field
1133 if (!forHeaderExtension)
1134 R__ASSERT(GetHeaderExtensionOffset() == -1U);
1135
1136 std::vector<DescriptorId_t> fieldTrees;
1137 if (!forHeaderExtension) {
1138 fieldTrees.emplace_back(fieldZeroId);
1139 } else if (auto xHeader = desc.GetHeaderExtension()) {
1140 fieldTrees = xHeader->GetTopLevelFields(desc);
1141 }
1142 depthFirstTraversal(fieldTrees, [&](DescriptorId_t fieldId) { MapFieldId(fieldId); });
1143 depthFirstTraversal(fieldTrees, [&](DescriptorId_t fieldId) {
1144 for (const auto &c : desc.GetColumnIterable(fieldId))
1145 if (!c.IsAliasColumn())
1146 MapColumnId(c.GetLogicalId());
1147 });
1148 depthFirstTraversal(fieldTrees, [&](DescriptorId_t fieldId) {
1149 for (const auto &c : desc.GetColumnIterable(fieldId))
1150 if (c.IsAliasColumn())
1151 MapColumnId(c.GetLogicalId());
1152 });
1153
1154 // Anything added after this point is accounted for the header extension
1155 if (!forHeaderExtension)
1156 BeginHeaderExtension();
1157}
1158
1160 const RNTupleDescriptor &desc,
1161 const RContext &context,
1162 bool forHeaderExtension)
1163{
1164 auto base = reinterpret_cast<unsigned char *>(buffer);
1165 auto pos = base;
1166 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1167
1168 std::size_t nFields = 0, nColumns = 0, nAliasColumns = 0, fieldListOffset = 0;
1169 if (forHeaderExtension) {
1170 // A call to `RNTupleDescriptorBuilder::BeginHeaderExtension()` is not strictly required after serializing the
1171 // header, which may happen, e.g., in unit tests. Ensure an empty schema extension is serialized in this case
1172 if (auto xHeader = desc.GetHeaderExtension()) {
1173 nFields = xHeader->GetNFields();
1174 nColumns = xHeader->GetNPhysicalColumns();
1175 nAliasColumns = xHeader->GetNLogicalColumns() - xHeader->GetNPhysicalColumns();
1176 fieldListOffset = context.GetHeaderExtensionOffset();
1177 }
1178 } else {
1179 nFields = desc.GetNFields() - 1;
1180 nColumns = desc.GetNPhysicalColumns();
1181 nAliasColumns = desc.GetNLogicalColumns() - desc.GetNPhysicalColumns();
1182 }
1183 const auto nExtraTypeInfos = desc.GetNExtraTypeInfos();
1184 const auto &onDiskFields = context.GetOnDiskFieldList();
1185 R__ASSERT(onDiskFields.size() >= fieldListOffset);
1186 std::span<const DescriptorId_t> fieldList{onDiskFields.data() + fieldListOffset,
1187 onDiskFields.size() - fieldListOffset};
1188
1189 auto frame = pos;
1190 pos += SerializeListFramePreamble(nFields, *where);
1191 pos += SerializeFieldList(desc, fieldList, /*firstOnDiskId=*/fieldListOffset, context, *where);
1192 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1193
1194 frame = pos;
1195 pos += SerializeListFramePreamble(nColumns, *where);
1196 pos += SerializeColumnList(desc, fieldList, context, *where);
1197 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1198
1199 frame = pos;
1200 pos += SerializeListFramePreamble(nAliasColumns, *where);
1201 pos += SerializeAliasColumnList(desc, fieldList, context, *where);
1202 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1203
1204 frame = pos;
1205 pos += SerializeListFramePreamble(nExtraTypeInfos, *where);
1206 pos += SerializeExtraTypeInfoList(desc, *where);
1207 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1208
1209 return static_cast<std::uint32_t>(pos - base);
1210}
1211
1214 RNTupleDescriptorBuilder &descBuilder)
1215{
1216 auto base = reinterpret_cast<const unsigned char *>(buffer);
1217 auto bytes = base;
1218 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1220
1221 std::uint64_t frameSize;
1222 auto frame = bytes;
1223 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1224
1225 std::uint32_t nFields;
1226 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nFields);
1227 if (!result)
1228 return R__FORWARD_ERROR(result);
1229 bytes += result.Unwrap();
1230 // The zero field is always added before `DeserializeSchemaDescription()` is called
1231 const std::uint32_t fieldIdRangeBegin = descBuilder.GetDescriptor().GetNFields() - 1;
1232 for (unsigned i = 0; i < nFields; ++i) {
1233 std::uint32_t fieldId = fieldIdRangeBegin + i;
1234 RFieldDescriptorBuilder fieldBuilder;
1235 result = DeserializeField(bytes, fnFrameSizeLeft(), fieldBuilder);
1236 if (!result)
1237 return R__FORWARD_ERROR(result);
1238 bytes += result.Unwrap();
1239 if (fieldId == fieldBuilder.GetParentId())
1240 fieldBuilder.ParentId(kZeroFieldId);
1241 auto fieldDesc = fieldBuilder.FieldId(fieldId).MakeDescriptor();
1242 if (!fieldDesc)
1243 return R__FORWARD_ERROR(fieldDesc);
1244 auto parentId = fieldDesc.Inspect().GetParentId();
1245 descBuilder.AddField(fieldDesc.Unwrap());
1246 auto resVoid = descBuilder.AddFieldLink(parentId, fieldId);
1247 if (!resVoid)
1248 return R__FORWARD_ERROR(resVoid);
1249 }
1250 bytes = frame + frameSize;
1251
1252 std::uint32_t nColumns;
1253 frame = bytes;
1254 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumns);
1255 if (!result)
1256 return R__FORWARD_ERROR(result);
1257 bytes += result.Unwrap();
1258 const std::uint32_t columnIdRangeBegin = descBuilder.GetDescriptor().GetNLogicalColumns();
1259 std::unordered_map<DescriptorId_t, std::uint32_t> maxIndexes;
1260 for (unsigned i = 0; i < nColumns; ++i) {
1261 std::uint32_t columnId = columnIdRangeBegin + i;
1262 RColumnDescriptorBuilder columnBuilder;
1263 result = DeserializeColumn(bytes, fnFrameSizeLeft(), columnBuilder);
1264 if (!result)
1265 return R__FORWARD_ERROR(result);
1266 bytes += result.Unwrap();
1267
1268 std::uint32_t idx = 0;
1269 const auto fieldId = columnBuilder.GetFieldId();
1270 auto maxIdx = maxIndexes.find(fieldId);
1271 if (maxIdx != maxIndexes.end())
1272 idx = maxIdx->second + 1;
1273 maxIndexes[fieldId] = idx;
1274
1275 auto columnDesc = columnBuilder.Index(idx).LogicalColumnId(columnId).PhysicalColumnId(columnId).MakeDescriptor();
1276 if (!columnDesc)
1277 return R__FORWARD_ERROR(columnDesc);
1278 auto resVoid = descBuilder.AddColumn(columnDesc.Unwrap());
1279 if (!resVoid)
1280 return R__FORWARD_ERROR(resVoid);
1281 }
1282 bytes = frame + frameSize;
1283
1284 std::uint32_t nAliasColumns;
1285 frame = bytes;
1286 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAliasColumns);
1287 if (!result)
1288 return R__FORWARD_ERROR(result);
1289 bytes += result.Unwrap();
1290 const std::uint32_t aliasColumnIdRangeBegin = columnIdRangeBegin + nColumns;
1291 for (unsigned i = 0; i < nAliasColumns; ++i) {
1292 std::uint32_t physicalId;
1293 std::uint32_t fieldId;
1294 result = DeserializeAliasColumn(bytes, fnFrameSizeLeft(), physicalId, fieldId);
1295 if (!result)
1296 return R__FORWARD_ERROR(result);
1297 bytes += result.Unwrap();
1298
1299 RColumnDescriptorBuilder columnBuilder;
1300 columnBuilder.LogicalColumnId(aliasColumnIdRangeBegin + i).PhysicalColumnId(physicalId).FieldId(fieldId);
1301 columnBuilder.Model(descBuilder.GetDescriptor().GetColumnDescriptor(physicalId).GetModel());
1302
1303 std::uint32_t idx = 0;
1304 auto maxIdx = maxIndexes.find(fieldId);
1305 if (maxIdx != maxIndexes.end())
1306 idx = maxIdx->second + 1;
1307 maxIndexes[fieldId] = idx;
1308
1309 auto aliasColumnDesc = columnBuilder.Index(idx).MakeDescriptor();
1310 if (!aliasColumnDesc)
1311 return R__FORWARD_ERROR(aliasColumnDesc);
1312 auto resVoid = descBuilder.AddColumn(aliasColumnDesc.Unwrap());
1313 if (!resVoid)
1314 return R__FORWARD_ERROR(resVoid);
1315 }
1316 bytes = frame + frameSize;
1317
1318 std::uint32_t nExtraTypeInfos;
1319 frame = bytes;
1320 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nExtraTypeInfos);
1321 if (!result)
1322 return R__FORWARD_ERROR(result);
1323 bytes += result.Unwrap();
1324 for (unsigned i = 0; i < nExtraTypeInfos; ++i) {
1325 RExtraTypeInfoDescriptorBuilder extraTypeInfoBuilder;
1326 result = DeserializeExtraTypeInfo(bytes, fnFrameSizeLeft(), extraTypeInfoBuilder);
1327 if (!result)
1328 return R__FORWARD_ERROR(result);
1329 bytes += result.Unwrap();
1330
1331 auto extraTypeInfoDesc = extraTypeInfoBuilder.MoveDescriptor();
1332 // We ignore unknown extra type information
1333 if (extraTypeInfoDesc)
1334 descBuilder.AddExtraTypeInfo(extraTypeInfoDesc.Unwrap());
1335 }
1336 bytes = frame + frameSize;
1337
1338 return bytes - base;
1339}
1340
1344{
1345 RContext context;
1346
1347 auto base = reinterpret_cast<unsigned char *>(buffer);
1348 auto pos = base;
1349 void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
1350
1351 pos += SerializeEnvelopePreamble(kEnvelopeTypeHeader, *where);
1352 // So far we don't make use of feature flags
1353 pos += SerializeFeatureFlags(desc.GetFeatureFlags(), *where);
1354 pos += SerializeString(desc.GetName(), *where);
1355 pos += SerializeString(desc.GetDescription(), *where);
1356 pos += SerializeString(std::string("ROOT v") + ROOT_RELEASE, *where);
1357
1358 context.MapSchema(desc, /*forHeaderExtension=*/false);
1359 pos += SerializeSchemaDescription(*where, desc, context);
1360
1361 std::uint64_t size = pos - base;
1362 std::uint64_t xxhash3 = 0;
1363 size += SerializeEnvelopePostscript(base, size, xxhash3);
1364
1365 context.SetHeaderSize(size);
1366 context.SetHeaderXxHash3(xxhash3);
1367 return context;
1368}
1369
1370std::uint32_t
1372 std::span<DescriptorId_t> physClusterIDs,
1373 const RContext &context)
1374{
1375 auto base = reinterpret_cast<unsigned char *>(buffer);
1376 auto pos = base;
1377 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1378
1379 pos += SerializeEnvelopePreamble(kEnvelopeTypePageList, *where);
1380
1381 pos += SerializeUInt64(context.GetHeaderXxHash3(), *where);
1382
1383 // Cluster summaries
1384 const auto nClusters = physClusterIDs.size();
1385 auto clusterSummaryFrame = pos;
1386 pos += SerializeListFramePreamble(nClusters, *where);
1387 for (auto clusterId : physClusterIDs) {
1388 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(clusterId));
1389 RClusterSummary summary{clusterDesc.GetFirstEntryIndex(), clusterDesc.GetNEntries(), -1};
1390 pos += SerializeClusterSummary(summary, *where);
1391 }
1392 pos += SerializeFramePostscript(buffer ? clusterSummaryFrame : nullptr, pos - clusterSummaryFrame);
1393
1394 // Page locations
1395 auto topMostFrame = pos;
1396 pos += SerializeListFramePreamble(nClusters, *where);
1397
1398 for (auto clusterId : physClusterIDs) {
1399 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(clusterId));
1400 // Get an ordered set of physical column ids
1401 std::set<DescriptorId_t> onDiskColumnIds;
1402 for (auto column : clusterDesc.GetColumnIds())
1403 onDiskColumnIds.insert(context.GetOnDiskColumnId(column));
1404
1405 auto outerFrame = pos;
1406 pos += SerializeListFramePreamble(onDiskColumnIds.size(), *where);
1407 for (auto onDiskId : onDiskColumnIds) {
1408 auto memId = context.GetMemColumnId(onDiskId);
1409 const auto &columnRange = clusterDesc.GetColumnRange(memId);
1410 const auto &pageRange = clusterDesc.GetPageRange(memId);
1411
1412 auto innerFrame = pos;
1413 pos += SerializeListFramePreamble(pageRange.fPageInfos.size(), *where);
1414
1415 for (const auto &pi : pageRange.fPageInfos) {
1416 pos += SerializeUInt32(pi.fNElements, *where);
1417 pos += SerializeLocator(pi.fLocator, *where);
1418 }
1419 pos += SerializeUInt64(columnRange.fFirstElementIndex, *where);
1420 pos += SerializeUInt32(columnRange.fCompressionSettings, *where);
1421
1422 pos += SerializeFramePostscript(buffer ? innerFrame : nullptr, pos - innerFrame);
1423 }
1424 pos += SerializeFramePostscript(buffer ? outerFrame : nullptr, pos - outerFrame);
1425 }
1426
1427 pos += SerializeFramePostscript(buffer ? topMostFrame : nullptr, pos - topMostFrame);
1428 std::uint64_t size = pos - base;
1429 size += SerializeEnvelopePostscript(base, size);
1430 return size;
1431}
1432
1433std::uint32_t
1436 const RContext &context)
1437{
1438 auto base = reinterpret_cast<unsigned char *>(buffer);
1439 auto pos = base;
1440 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1441
1442 pos += SerializeEnvelopePreamble(kEnvelopeTypeFooter, *where);
1443
1444 // So far we don't make use of footer feature flags
1445 pos += SerializeFeatureFlags(std::vector<std::uint64_t>(), *where);
1446 pos += SerializeUInt64(context.GetHeaderXxHash3(), *where);
1447
1448 // Schema extension, i.e. incremental changes with respect to the header
1449 auto frame = pos;
1450 pos += SerializeRecordFramePreamble(*where);
1451 pos += SerializeSchemaDescription(*where, desc, context, /*forHeaderExtension=*/true);
1452 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1453
1454 // So far no support for shared clusters (no column groups)
1455 frame = pos;
1456 pos += SerializeListFramePreamble(0, *where);
1457 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1458
1459 // Cluster groups
1460 frame = pos;
1461 const auto nClusterGroups = desc.GetNClusterGroups();
1462 pos += SerializeListFramePreamble(nClusterGroups, *where);
1463 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1464 const auto &cgDesc = desc.GetClusterGroupDescriptor(context.GetMemClusterGroupId(i));
1465 RClusterGroup clusterGroup;
1466 clusterGroup.fMinEntry = cgDesc.GetMinEntry();
1467 clusterGroup.fEntrySpan = cgDesc.GetEntrySpan();
1468 clusterGroup.fNClusters = cgDesc.GetNClusters();
1469 clusterGroup.fPageListEnvelopeLink.fLength = cgDesc.GetPageListLength();
1470 clusterGroup.fPageListEnvelopeLink.fLocator = cgDesc.GetPageListLocator();
1471 pos += SerializeClusterGroup(clusterGroup, *where);
1472 }
1473 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1474
1475 // So far no support for meta-data
1476 frame = pos;
1477 pos += SerializeListFramePreamble(0, *where);
1478 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1479
1480 std::uint32_t size = pos - base;
1481 size += SerializeEnvelopePostscript(base, size);
1482 return size;
1483}
1484
1487 RNTupleDescriptorBuilder &descBuilder)
1488{
1489 auto base = reinterpret_cast<const unsigned char *>(buffer);
1490 auto bytes = base;
1491 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1493
1494 std::uint64_t xxhash3{0};
1495 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), kEnvelopeTypeHeader, xxhash3);
1496 if (!result)
1497 return R__FORWARD_ERROR(result);
1498 bytes += result.Unwrap();
1499 descBuilder.SetOnDiskHeaderXxHash3(xxhash3);
1500
1501 std::vector<std::uint64_t> featureFlags;
1502 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1503 if (!result)
1504 return R__FORWARD_ERROR(result);
1505 bytes += result.Unwrap();
1506 for (std::size_t i = 0; i < featureFlags.size(); ++i) {
1507 if (!featureFlags[i])
1508 continue;
1509 unsigned int bit = 0;
1510 while (!(featureFlags[i] & (static_cast<uint64_t>(1) << bit)))
1511 bit++;
1512 return R__FAIL("unsupported format feature: " + std::to_string(i * 64 + bit));
1513 }
1514
1515 std::string name;
1516 std::string description;
1517 std::string writer;
1518 result = DeserializeString(bytes, fnBufSizeLeft(), name);
1519 if (!result)
1520 return R__FORWARD_ERROR(result);
1521 bytes += result.Unwrap();
1522 result = DeserializeString(bytes, fnBufSizeLeft(), description);
1523 if (!result)
1524 return R__FORWARD_ERROR(result);
1525 bytes += result.Unwrap();
1526 result = DeserializeString(bytes, fnBufSizeLeft(), writer);
1527 if (!result)
1528 return R__FORWARD_ERROR(result);
1529 bytes += result.Unwrap();
1530 descBuilder.SetNTuple(name, description);
1531
1532 // Zero field
1533 descBuilder.AddField(
1534 RFieldDescriptorBuilder().FieldId(kZeroFieldId).Structure(ENTupleStructure::kRecord).MakeDescriptor().Unwrap());
1535 result = DeserializeSchemaDescription(bytes, fnBufSizeLeft(), descBuilder);
1536 if (!result)
1537 return R__FORWARD_ERROR(result);
1538
1539 return RResult<void>::Success();
1540}
1541
1544 RNTupleDescriptorBuilder &descBuilder)
1545{
1546 auto base = reinterpret_cast<const unsigned char *>(buffer);
1547 auto bytes = base;
1548 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1550
1551 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), kEnvelopeTypeFooter);
1552 if (!result)
1553 return R__FORWARD_ERROR(result);
1554 bytes += result.Unwrap();
1555
1556 std::vector<std::uint64_t> featureFlags;
1557 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1558 if (!result)
1559 return R__FORWARD_ERROR(result);
1560 bytes += result.Unwrap();
1561 for (auto f: featureFlags) {
1562 if (f)
1563 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1564 }
1565
1566 std::uint64_t xxhash3{0};
1567 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint64_t)))
1568 return R__FAIL("footer too short");
1569 bytes += DeserializeUInt64(bytes, xxhash3);
1570 if (xxhash3 != descBuilder.GetDescriptor().GetOnDiskHeaderXxHash3())
1571 return R__FAIL("XxHash-3 mismatch between header and footer");
1572
1573 std::uint64_t frameSize;
1574 auto frame = bytes;
1575 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1576
1577 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize);
1578 if (!result)
1579 return R__FORWARD_ERROR(result);
1580 bytes += result.Unwrap();
1581 if (fnFrameSizeLeft() > 0) {
1582 descBuilder.BeginHeaderExtension();
1583 result = DeserializeSchemaDescription(bytes, fnFrameSizeLeft(), descBuilder);
1584 if (!result)
1585 return R__FORWARD_ERROR(result);
1586 }
1587 bytes = frame + frameSize;
1588
1589 std::uint32_t nColumnGroups;
1590 frame = bytes;
1591 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumnGroups);
1592 if (!result)
1593 return R__FORWARD_ERROR(result);
1594 if (nColumnGroups > 0)
1595 return R__FAIL("sharded clusters are still unsupported");
1596 bytes = frame + frameSize;
1597
1598 std::uint32_t nClusterGroups;
1599 frame = bytes;
1600 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups);
1601 if (!result)
1602 return R__FORWARD_ERROR(result);
1603 bytes += result.Unwrap();
1604 for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1605 RClusterGroup clusterGroup;
1606 result = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup);
1607 if (!result)
1608 return R__FORWARD_ERROR(result);
1609 bytes += result.Unwrap();
1610
1612 RClusterGroupDescriptorBuilder clusterGroupBuilder;
1613 clusterGroupBuilder.ClusterGroupId(groupId)
1616 .MinEntry(clusterGroup.fMinEntry)
1617 .EntrySpan(clusterGroup.fEntrySpan)
1618 .NClusters(clusterGroup.fNClusters);
1619 descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap());
1620 }
1621 bytes = frame + frameSize;
1622
1623 std::uint32_t nMDBlocks;
1624 frame = bytes;
1625 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nMDBlocks);
1626 if (!result)
1627 return R__FORWARD_ERROR(result);
1628 if (nMDBlocks > 0)
1629 R__LOG_WARNING(NTupleLog()) << "meta-data blocks are still unsupported";
1630 bytes = frame + frameSize;
1631
1632 return RResult<void>::Success();
1633}
1634
1637 DescriptorId_t clusterGroupId,
1638 RNTupleDescriptor &desc)
1639{
1640 auto base = reinterpret_cast<const unsigned char *>(buffer);
1641 auto bytes = base;
1642 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1644
1645 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), kEnvelopeTypePageList);
1646 if (!result)
1647 return R__FORWARD_ERROR(result);
1648 bytes += result.Unwrap();
1649
1650 std::uint64_t xxhash3{0};
1651 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint64_t)))
1652 return R__FAIL("page list too short");
1653 bytes += DeserializeUInt64(bytes, xxhash3);
1654 if (xxhash3 != desc.GetOnDiskHeaderXxHash3())
1655 return R__FAIL("XxHash-3 mismatch between header and page list");
1656
1657 std::vector<RClusterDescriptorBuilder> clusterBuilders;
1658 DescriptorId_t firstClusterId{0};
1659 for (DescriptorId_t i = 0; i < clusterGroupId; ++i) {
1660 firstClusterId = firstClusterId + desc.GetClusterGroupDescriptor(i).GetNClusters();
1661 }
1662
1663 std::uint64_t clusterSummaryFrameSize;
1664 auto clusterSummaryFrame = bytes;
1665 auto fnClusterSummaryFrameSizeLeft = [&]() { return clusterSummaryFrameSize - (bytes - clusterSummaryFrame); };
1666
1667 std::uint32_t nClusterSummaries;
1668 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), clusterSummaryFrameSize, nClusterSummaries);
1669 if (!result)
1670 return R__FORWARD_ERROR(result);
1671 bytes += result.Unwrap();
1672 for (auto clusterId = firstClusterId; clusterId < firstClusterId + nClusterSummaries; ++clusterId) {
1673 RClusterSummary clusterSummary;
1674 result = DeserializeClusterSummary(bytes, fnClusterSummaryFrameSizeLeft(), clusterSummary);
1675 if (!result)
1676 return R__FORWARD_ERROR(result);
1677 bytes += result.Unwrap();
1678 if (clusterSummary.fColumnGroupID >= 0)
1679 return R__FAIL("sharded clusters are still unsupported");
1680
1682 builder.ClusterId(clusterId).FirstEntryIndex(clusterSummary.fFirstEntry).NEntries(clusterSummary.fNEntries);
1683 clusterBuilders.emplace_back(std::move(builder));
1684 }
1685 bytes = clusterSummaryFrame + clusterSummaryFrameSize;
1686
1687 std::uint64_t topMostFrameSize;
1688 auto topMostFrame = bytes;
1689 auto fnTopMostFrameSizeLeft = [&]() { return topMostFrameSize - (bytes - topMostFrame); };
1690
1691 std::uint32_t nClusters;
1692 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), topMostFrameSize, nClusters);
1693 if (!result)
1694 return R__FORWARD_ERROR(result);
1695 bytes += result.Unwrap();
1696
1697 if (nClusters != nClusterSummaries)
1698 return R__FAIL("mismatch between number of clusters and number of cluster summaries");
1699
1700 std::vector<RClusterDescriptor> clusters;
1701 for (std::uint32_t i = 0; i < nClusters; ++i) {
1702 std::uint64_t outerFrameSize;
1703 auto outerFrame = bytes;
1704 auto fnOuterFrameSizeLeft = [&]() { return outerFrameSize - (bytes - outerFrame); };
1705
1706 std::uint32_t nColumns;
1707 result = DeserializeFrameHeader(bytes, fnTopMostFrameSizeLeft(), outerFrameSize, nColumns);
1708 if (!result)
1709 return R__FORWARD_ERROR(result);
1710 bytes += result.Unwrap();
1711
1712 for (std::uint32_t j = 0; j < nColumns; ++j) {
1713 std::uint64_t innerFrameSize;
1714 auto innerFrame = bytes;
1715 auto fnInnerFrameSizeLeft = [&]() { return innerFrameSize - (bytes - innerFrame); };
1716
1717 std::uint32_t nPages;
1718 result = DeserializeFrameHeader(bytes, fnOuterFrameSizeLeft(), innerFrameSize, nPages);
1719 if (!result)
1720 return R__FORWARD_ERROR(result);
1721 bytes += result.Unwrap();
1722
1724 pageRange.fPhysicalColumnId = j;
1725 for (std::uint32_t k = 0; k < nPages; ++k) {
1726 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1727 return R__FAIL("inner frame too short");
1728 std::int32_t nElements;
1729 RNTupleLocator locator;
1730 bytes += DeserializeInt32(bytes, nElements);
1731 if (nElements < 0) {
1732 // TODO(jblomer): page with checksum
1733 nElements = -nElements;
1734 }
1735 result = DeserializeLocator(bytes, fnInnerFrameSizeLeft(), locator);
1736 if (!result)
1737 return R__FORWARD_ERROR(result);
1738 pageRange.fPageInfos.push_back({static_cast<std::uint32_t>(nElements), locator});
1739 bytes += result.Unwrap();
1740 }
1741
1742 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t) + sizeof(std::uint64_t)))
1743 return R__FAIL("page list frame too short");
1744 std::uint64_t columnOffset;
1745 bytes += DeserializeUInt64(bytes, columnOffset);
1746 std::uint32_t compressionSettings;
1747 bytes += DeserializeUInt32(bytes, compressionSettings);
1748
1749 clusterBuilders[i].CommitColumnRange(j, columnOffset, compressionSettings, pageRange);
1750 bytes = innerFrame + innerFrameSize;
1751 } // loop over columns
1752
1753 bytes = outerFrame + outerFrameSize;
1754
1755 clusterBuilders[i].AddExtendedColumnRanges(desc);
1756 clusters.emplace_back(clusterBuilders[i].MoveDescriptor().Unwrap());
1757 } // loop over clusters
1758 desc.AddClusterGroupDetails(clusterGroupId, clusters);
1759
1760 return RResult<void>::Success();
1761}
1762
1764{
1765 TList streamerInfos;
1766 for (auto si : infos) {
1767 assert(si.first == si.second->GetNumber());
1768 streamerInfos.Add(si.second);
1769 }
1771 buffer.WriteObject(&streamerInfos);
1772 assert(buffer.Length() > 0);
1773 return std::string{buffer.Buffer(), static_cast<UInt_t>(buffer.Length())};
1774}
1775
1778{
1779 StreamerInfoMap_t infoMap;
1780
1781 TBufferFile buffer(TBuffer::kRead, extraTypeInfoContent.length(), const_cast<char *>(extraTypeInfoContent.data()),
1782 false /* adopt */);
1783 auto infoList = reinterpret_cast<TList *>(buffer.ReadObject(TList::Class()));
1784 infoList->SetOwner(); // delete the TStreamerInfo items of the list
1785
1786 TObjLink *lnk = infoList->FirstLink();
1787 while (lnk) {
1788 auto info = reinterpret_cast<TStreamerInfo *>(lnk->GetObject());
1789 info->BuildCheck();
1790 infoMap[info->GetNumber()] = info->GetClass()->GetStreamerInfo();
1791 assert(info->GetNumber() == infoMap[info->GetNumber()]->GetNumber());
1792 lnk = lnk->Next();
1793 }
1794
1795 delete infoList;
1796
1797 return infoMap;
1798}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:294
#define R__FORWARD_RESULT(res)
Short-hand to return an RResult<T> value from a subroutine to the calling stack frame.
Definition RError.hxx:292
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define R__LOG_WARNING(...)
Definition RLogger.hxx:363
#define R__LOG_DEBUG(DEBUGLEVEL,...)
Definition RLogger.hxx:365
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define ROOT_RELEASE
Definition RVersion.hxx:29
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The available trivial, native content types of a column.
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersionTo(std::uint32_t typeVersionTo)
RExtraTypeInfoDescriptorBuilder & TypeVersionFrom(std::uint32_t typeVersionFrom)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetOnDiskColumnId(DescriptorId_t memId) const
const std::vector< DescriptorId_t > & GetOnDiskFieldList() const
Return a vector containing the in-memory field ID for each on-disk counterpart, in order,...
DescriptorId_t GetOnDiskFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t onDiskId) const
std::size_t GetHeaderExtensionOffset() const
Return the offset of the first element in fOnDisk2MemFieldIDs that is part of the schema extension.
DescriptorId_t GetMemClusterGroupId(DescriptorId_t onDiskId) const
DescriptorId_t GetMemClusterId(DescriptorId_t onDiskId) const
void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension)
Map in-memory field and column IDs to their on-disk counterparts.
A helper class for serializing and deserialization of the RNTuple binary format.
static std::uint32_t SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer)
Writes a XxHash-3 64bit checksum of the byte range given by data and length.
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint64_t bufSize, RClusterSummary &clusterSummary)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint64_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size)
static std::uint32_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static RContext SerializeHeader(void *buffer, const RNTupleDescriptor &desc)
static RResult< std::uint32_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::uint64_t > &flags, void *buffer)
static RResult< std::uint32_t > DeserializeExtraTypeInfoId(const void *buffer, ROOT::Experimental::EExtraTypeInfoIds &id)
std::unordered_map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static std::string SerializeStreamerInfos(const StreamerInfoMap_t &infos)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize, std::uint32_t &nitems)
static std::uint32_t SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(std::uint16_t envelopeType, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static RResult< std::uint32_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
static std::uint32_t SerializeFramePostscript(void *frame, std::uint64_t size)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId, RNTupleDescriptor &desc)
static std::uint32_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context, bool forHeaderExtension=false)
Serialize the schema description in desc into buffer.
static RResult< std::uint32_t > DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static RResult< void > VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3)
Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint64_t bufSize, std::vector< std::uint64_t > &flags)
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint64_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType)
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint64_t bufSize, RNTupleLocator &locator)
static std::uint32_t SerializeExtraTypeInfoId(ROOT::Experimental::EExtraTypeInfoIds id, void *buffer)
Records the parition of data into pages for a particular column in a particular cluster.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
Meta-data stored for every field of an ntuple.
The on-storage meta-data of an ntuple.
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RColumnDescriptorIterable GetColumnIterable() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
The concrete implementation of TBuffer for writing/reading to/from a ROOT file or socket.
Definition TBufferFile.h:47
TObject * ReadObject(const TClass *cl) override
Read object from I/O buffer.
void WriteObject(const TObject *obj, Bool_t cacheReuse=kTRUE) override
Write object to I/O buffer.
@ kWrite
Definition TBuffer.h:73
@ kRead
Definition TBuffer.h:73
Int_t Length() const
Definition TBuffer.h:100
char * Buffer() const
Definition TBuffer.h:96
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
A doubly linked list.
Definition TList.h:38
static TClass * Class()
void Add(TObject *obj) override
Definition TList.h:83
Describes a persistent version of a class.
void BuildCheck(TFile *file=nullptr, Bool_t load=kTRUE) override
Check if built and consistent with the class dictionary.
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
RNTupleLocator payload that is common for object stores using 64bit location information.
Generic information about the physical location of data.
ELocatorType
Values for the Type field in non-disk locators.
std::uint8_t fReserved
Reserved for use by concrete storage backends.
ELocatorType fType
For non-disk locators, the value for the Type field.
std::variant< std::uint64_t, std::string, RNTupleLocatorObject64 > fPosition
Simple on-disk locators consisting of a 64-bit offset use variant type uint64_t; extended locators ha...