Logo ROOT  
Reference Guide
RNTupleSerialize.cxx
Go to the documentation of this file.
1/// \file RNTupleSerialize.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
17#include <ROOT/RColumnModel.hxx>
18#include <ROOT/RError.hxx>
21
22#include <RVersion.h>
23#include <RZip.h> // for R__crc32
24
25#include <cstring> // for memcpy
26#include <deque>
27#include <set>
28#include <unordered_map>
29
30template <typename T>
32
33
34namespace {
35
36std::uint32_t SerializeFieldV1(
37 const ROOT::Experimental::RFieldDescriptor &fieldDesc, ROOT::Experimental::DescriptorId_t physParentId, void *buffer)
38{
40
41 auto base = reinterpret_cast<unsigned char *>(buffer);
42 auto pos = base;
43 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
44
45 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
46
47 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetFieldVersion(), *where);
48 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetTypeVersion(), *where);
49 pos += RNTupleSerializer::SerializeUInt32(physParentId, *where);
50 pos += RNTupleSerializer::SerializeFieldStructure(fieldDesc.GetStructure(), *where);
51 if (fieldDesc.GetNRepetitions() > 0) {
52 pos += RNTupleSerializer::SerializeUInt16(RNTupleSerializer::kFlagRepetitiveField, *where);
53 pos += RNTupleSerializer::SerializeUInt64(fieldDesc.GetNRepetitions(), *where);
54 } else {
55 pos += RNTupleSerializer::SerializeUInt16(0, *where);
56 }
57 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldName(), *where);
58 pos += RNTupleSerializer::SerializeString(fieldDesc.GetTypeName(), *where);
59 pos += RNTupleSerializer::SerializeString("" /* type alias */, *where);
60 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldDescription(), *where);
61
62 auto size = pos - base;
63 RNTupleSerializer::SerializeFramePostscript(base, size);
64
65 return size;
66}
67
68
69std::uint32_t SerializeFieldTree(
72 void *buffer)
73{
74 auto base = reinterpret_cast<unsigned char *>(buffer);
75 auto pos = base;
76 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
77
78 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
79
80 while (!idQueue.empty()) {
81 auto parentId = idQueue.front();
82 idQueue.pop_front();
83
84 for (const auto &f : desc.GetFieldIterable(parentId)) {
85 auto physFieldId = context.MapFieldId(f.GetId());
86 auto physParentId = (parentId == desc.GetFieldZeroId()) ? physFieldId : context.GetPhysFieldId(parentId);
87 pos += SerializeFieldV1(f, physParentId, *where);
88 idQueue.push_back(f.GetId());
89 }
90 }
91
92 return pos - base;
93}
94
95RResult<std::uint32_t> DeserializeFieldV1(
96 const void *buffer,
97 std::uint32_t bufSize,
99{
100 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
102
103 auto base = reinterpret_cast<const unsigned char *>(buffer);
104 auto bytes = base;
105 std::uint32_t frameSize;
106 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
107 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
108 if (!result)
109 return R__FORWARD_ERROR(result);
110 bytes += result.Unwrap();
111
112 std::uint32_t fieldVersion;
113 std::uint32_t typeVersion;
114 std::uint32_t parentId;
115 // initialize properly for call to SerializeFieldStructure()
117 std::uint16_t flags;
118 if (fnFrameSizeLeft() < 3 * sizeof(std::uint32_t) +
119 RNTupleSerializer::SerializeFieldStructure(structure, nullptr) +
120 sizeof(std::uint16_t))
121 {
122 return R__FAIL("field record frame too short");
123 }
124 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldVersion);
125 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersion);
126 bytes += RNTupleSerializer::DeserializeUInt32(bytes, parentId);
127 auto res16 = RNTupleSerializer::DeserializeFieldStructure(bytes, structure);
128 if (!res16)
129 return R__FORWARD_ERROR(res16);
130 bytes += res16.Unwrap();
131 bytes += RNTupleSerializer::DeserializeUInt16(bytes, flags);
132 fieldDesc.FieldVersion(fieldVersion).TypeVersion(typeVersion).ParentId(parentId).Structure(structure);
133
134 if (flags & RNTupleSerializer::kFlagRepetitiveField) {
135 if (fnFrameSizeLeft() < sizeof(std::uint64_t))
136 return R__FAIL("field record frame too short");
137 std::uint64_t nRepetitions;
138 bytes += RNTupleSerializer::DeserializeUInt64(bytes, nRepetitions);
139 fieldDesc.NRepetitions(nRepetitions);
140 }
141
142 std::string fieldName;
143 std::string typeName;
144 std::string aliasName; // so far unused
145 std::string description;
146 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), fieldName).Unwrap();
147 if (!result)
148 return R__FORWARD_ERROR(result);
149 bytes += result.Unwrap();
150 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), typeName).Unwrap();
151 if (!result)
152 return R__FORWARD_ERROR(result);
153 bytes += result.Unwrap();
154 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), aliasName).Unwrap();
155 if (!result)
156 return R__FORWARD_ERROR(result);
157 bytes += result.Unwrap();
158 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), description).Unwrap();
159 if (!result)
160 return R__FORWARD_ERROR(result);
161 bytes += result.Unwrap();
162 fieldDesc.FieldName(fieldName).TypeName(typeName).FieldDescription(description);
163
164 return frameSize;
165}
166
167std::uint32_t SerializeColumnListV1(
170 void *buffer)
171{
172 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
173 using RColumnElementBase = ROOT::Experimental::Detail::RColumnElementBase;
174
175 auto base = reinterpret_cast<unsigned char *>(buffer);
176 auto pos = base;
177 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
178
179 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
180
181 while (!idQueue.empty()) {
182 auto parentId = idQueue.front();
183 idQueue.pop_front();
184
185 for (const auto &c : desc.GetColumnIterable(parentId)) {
186 auto frame = pos;
187 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
188
189 auto type = c.GetModel().GetType();
190 pos += RNTupleSerializer::SerializeColumnType(type, *where);
191 pos += RNTupleSerializer::SerializeUInt16(RColumnElementBase::GetBitsOnStorage(type), *where);
192 pos += RNTupleSerializer::SerializeUInt32(context.GetPhysFieldId(c.GetFieldId()), *where);
193 std::uint32_t flags = 0;
194 // TODO(jblomer): add support for descending columns in the column model
195 if (c.GetModel().GetIsSorted())
196 flags |= RNTupleSerializer::kFlagSortAscColumn;
197 // TODO(jblomer): fix for unsigned integer types
199 flags |= RNTupleSerializer::kFlagNonNegativeColumn;
200 pos += RNTupleSerializer::SerializeUInt32(flags, *where);
201
202 pos += RNTupleSerializer::SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
203
204 context.MapColumnId(c.GetId());
205 }
206
207 for (const auto &f : desc.GetFieldIterable(parentId))
208 idQueue.push_back(f.GetId());
209 }
210
211 return pos - base;
212}
213
214RResult<std::uint32_t> DeserializeColumnV1(
215 const void *buffer,
216 std::uint32_t bufSize,
218{
219 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
221
222 auto base = reinterpret_cast<const unsigned char *>(buffer);
223 auto bytes = base;
224 std::uint32_t frameSize;
225 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
226 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
227 if (!result)
228 return R__FORWARD_ERROR(result);
229 bytes += result.Unwrap();
230
231 // Initialize properly for SerializeColumnType
232 EColumnType type{EColumnType::kIndex};
233 std::uint16_t bitsOnStorage;
234 std::uint32_t fieldId;
235 std::uint32_t flags;
236 if (fnFrameSizeLeft() < RNTupleSerializer::SerializeColumnType(type, nullptr) +
237 sizeof(std::uint16_t) + 2 * sizeof(std::uint32_t))
238 {
239 return R__FAIL("column record frame too short");
240 }
241 auto res16 = RNTupleSerializer::DeserializeColumnType(bytes, type);
242 if (!res16)
243 return R__FORWARD_ERROR(res16);
244 bytes += res16.Unwrap();
245 bytes += RNTupleSerializer::DeserializeUInt16(bytes, bitsOnStorage);
246 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldId);
247 bytes += RNTupleSerializer::DeserializeUInt32(bytes, flags);
248
250 return R__FAIL("column element size mismatch");
251
252 const bool isSorted = (flags & (RNTupleSerializer::kFlagSortAscColumn | RNTupleSerializer::kFlagSortDesColumn));
253 columnDesc.FieldId(fieldId).Model({type, isSorted});
254
255 return frameSize;
256}
257
258} // anonymous namespace
259
260
262 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
263{
264 if (buffer != nullptr) {
265 crc32 = R__crc32(0, nullptr, 0);
266 crc32 = R__crc32(crc32, data, length);
267 SerializeUInt32(crc32, buffer);
268 }
269 return 4;
270}
271
273 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
274{
275 auto checksumReal = R__crc32(0, nullptr, 0);
276 checksumReal = R__crc32(checksumReal, data, length);
277 DeserializeUInt32(data + length, crc32);
278 if (crc32 != checksumReal)
279 return R__FAIL("CRC32 checksum mismatch");
280 return RResult<void>::Success();
281}
282
283
285 const unsigned char *data, std::uint32_t length)
286{
287 std::uint32_t crc32;
288 return R__FORWARD_RESULT(VerifyCRC32(data, length, crc32));
289}
290
291
292std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt16(std::int16_t val, void *buffer)
293{
294 if (buffer != nullptr) {
295 auto bytes = reinterpret_cast<unsigned char *>(buffer);
296 bytes[0] = (val & 0x00FF);
297 bytes[1] = (val & 0xFF00) >> 8;
298 }
299 return 2;
300}
301
302std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt16(const void *buffer, std::int16_t &val)
303{
304 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
305 val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
306 return 2;
307}
308
309std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt16(std::uint16_t val, void *buffer)
310{
311 return SerializeInt16(val, buffer);
312}
313
314std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt16(const void *buffer, std::uint16_t &val)
315{
316 return DeserializeInt16(buffer, *reinterpret_cast<std::int16_t *>(&val));
317}
318
319std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt32(std::int32_t val, void *buffer)
320{
321 if (buffer != nullptr) {
322 auto bytes = reinterpret_cast<unsigned char *>(buffer);
323 bytes[0] = (val & 0x000000FF);
324 bytes[1] = (val & 0x0000FF00) >> 8;
325 bytes[2] = (val & 0x00FF0000) >> 16;
326 bytes[3] = (val & 0xFF000000) >> 24;
327 }
328 return 4;
329}
330
331std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt32(const void *buffer, std::int32_t &val)
332{
333 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
334 val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
335 (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
336 return 4;
337}
338
339std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt32(std::uint32_t val, void *buffer)
340{
341 return SerializeInt32(val, buffer);
342}
343
344std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt32(const void *buffer, std::uint32_t &val)
345{
346 return DeserializeInt32(buffer, *reinterpret_cast<std::int32_t *>(&val));
347}
348
349std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt64(std::int64_t val, void *buffer)
350{
351 if (buffer != nullptr) {
352 auto bytes = reinterpret_cast<unsigned char *>(buffer);
353 bytes[0] = (val & 0x00000000000000FF);
354 bytes[1] = (val & 0x000000000000FF00) >> 8;
355 bytes[2] = (val & 0x0000000000FF0000) >> 16;
356 bytes[3] = (val & 0x00000000FF000000) >> 24;
357 bytes[4] = (val & 0x000000FF00000000) >> 32;
358 bytes[5] = (val & 0x0000FF0000000000) >> 40;
359 bytes[6] = (val & 0x00FF000000000000) >> 48;
360 bytes[7] = (val & 0xFF00000000000000) >> 56;
361 }
362 return 8;
363}
364
365std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt64(const void *buffer, std::int64_t &val)
366{
367 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
368 val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
369 (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
370 (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
371 (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
372 return 8;
373}
374
375std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt64(std::uint64_t val, void *buffer)
376{
377 return SerializeInt64(val, buffer);
378}
379
380std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt64(const void *buffer, std::uint64_t &val)
381{
382 return DeserializeInt64(buffer, *reinterpret_cast<std::int64_t *>(&val));
383}
384
385std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeString(const std::string &val, void *buffer)
386{
387 if (buffer) {
388 auto pos = reinterpret_cast<unsigned char *>(buffer);
389 pos += SerializeUInt32(val.length(), pos);
390 memcpy(pos, val.data(), val.length());
391 }
392 return sizeof(std::uint32_t) + val.length();
393}
394
396 const void *buffer, std::uint32_t bufSize, std::string &val)
397{
398 if (bufSize < sizeof(std::uint32_t))
399 return R__FAIL("string buffer too short");
400 bufSize -= sizeof(std::uint32_t);
401
402 auto base = reinterpret_cast<const unsigned char *>(buffer);
403 auto bytes = base;
404 std::uint32_t length;
405 bytes += DeserializeUInt32(buffer, length);
406 if (bufSize < length)
407 return R__FAIL("string buffer too short");
408
409 val.resize(length);
410 memcpy(&val[0], bytes, length);
411 return sizeof(std::uint32_t) + length;
412}
413
414
417{
419 switch (type) {
421 return SerializeUInt16(0x02, buffer);
423 return SerializeUInt16(0x03, buffer);
425 return SerializeUInt16(0x04, buffer);
427 return SerializeUInt16(0x05, buffer);
429 return SerializeUInt16(0x06, buffer);
431 return SerializeUInt16(0x07, buffer);
433 return SerializeUInt16(0x08, buffer);
435 return SerializeUInt16(0x09, buffer);
437 return SerializeUInt16(0x0A, buffer);
439 return SerializeUInt16(0x0B, buffer);
441 return SerializeUInt16(0x0C, buffer);
443 return SerializeUInt16(0x0D, buffer);
444 default:
445 throw RException(R__FAIL("ROOT bug: unexpected column type"));
446 }
447}
448
449
451 const void *buffer, ROOT::Experimental::EColumnType &type)
452{
454 std::uint16_t onDiskType;
455 auto result = DeserializeUInt16(buffer, onDiskType);
456 switch (onDiskType) {
457 case 0x02:
459 break;
460 case 0x03:
462 break;
463 case 0x04:
465 break;
466 case 0x05:
468 break;
469 case 0x06:
471 break;
472 case 0x07:
474 break;
475 case 0x08:
477 break;
478 case 0x09:
480 break;
481 case 0x0A:
483 break;
484 case 0x0B:
486 break;
487 case 0x0C:
489 break;
490 case 0x0D:
492 break;
493 default:
494 return R__FAIL("unexpected on-disk column type");
495 }
496 return result;
497}
498
499
501 ROOT::Experimental::ENTupleStructure structure, void *buffer)
502{
504 switch (structure) {
506 return SerializeUInt16(0x00, buffer);
508 return SerializeUInt16(0x01, buffer);
510 return SerializeUInt16(0x02, buffer);
512 return SerializeUInt16(0x03, buffer);
514 return SerializeUInt16(0x04, buffer);
515 default:
516 throw RException(R__FAIL("ROOT bug: unexpected field structure type"));
517 }
518}
519
520
522 const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
523{
525 std::uint16_t onDiskValue;
526 auto result = DeserializeUInt16(buffer, onDiskValue);
527 switch (onDiskValue) {
528 case 0x00:
529 structure = ENTupleStructure::kLeaf;
530 break;
531 case 0x01:
533 break;
534 case 0x02:
535 structure = ENTupleStructure::kRecord;
536 break;
537 case 0x03:
538 structure = ENTupleStructure::kVariant;
539 break;
540 case 0x04:
542 break;
543 default:
544 return R__FAIL("unexpected on-disk field structure value");
545 }
546 return result;
547}
548
549
550/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
551/// may have different version numbers
553{
554 auto base = reinterpret_cast<unsigned char *>(buffer);
555 auto pos = base;
556 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
557
558 pos += SerializeUInt16(kEnvelopeCurrentVersion, *where);
559 pos += SerializeUInt16(kEnvelopeMinVersion, *where);
560 return pos - base;
561}
562
563
565 const unsigned char *envelope, std::uint32_t size, std::uint32_t &crc32, void *buffer)
566{
567 return SerializeCRC32(envelope, size, crc32, buffer);
568}
569
571 const unsigned char *envelope, std::uint32_t size, void *buffer)
572{
573 std::uint32_t crc32;
574 return SerializeEnvelopePostscript(envelope, size, crc32, buffer);
575}
576
577/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
578/// may have different version numbers
580 const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32)
581{
582 if (bufSize < (2 * sizeof(std::uint16_t) + sizeof(std::uint32_t)))
583 return R__FAIL("invalid envelope, too short");
584
585 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
586 auto base = bytes;
587
588 std::uint16_t protocolVersionAtWrite;
589 std::uint16_t protocolVersionMinRequired;
590 bytes += DeserializeUInt16(bytes, protocolVersionAtWrite);
591 // RNTuple compatible back to version 1 (but not to version 0)
592 if (protocolVersionAtWrite < 1)
593 return R__FAIL("The RNTuple format is too old (version 0)");
594
595 bytes += DeserializeUInt16(bytes, protocolVersionMinRequired);
596 if (protocolVersionMinRequired > kEnvelopeCurrentVersion) {
597 return R__FAIL(std::string("The RNTuple format is too new (version ") +
598 std::to_string(protocolVersionMinRequired) + ")");
599 }
600
601 // We defer the CRC32 check to the end to faciliate testing of forward/backward incompatibilities
602 auto result = VerifyCRC32(base, bufSize - 4, crc32);
603 if (!result)
604 return R__FORWARD_ERROR(result);
605
606 return sizeof(protocolVersionAtWrite) + sizeof(protocolVersionMinRequired);
607}
608
609
611 const void *buffer, std::uint32_t bufSize)
612{
613 std::uint32_t crc32;
614 return R__FORWARD_RESULT(DeserializeEnvelope(buffer, bufSize, crc32));
615}
616
617
619{
620 // Marker: multiply the final size with 1
621 return SerializeInt32(1, buffer);
622}
623
624
626 std::uint32_t nitems, void *buffer)
627{
628 if (nitems >= (1 << 28))
629 throw RException(R__FAIL("list frame too large: " + std::to_string(nitems)));
630
631 auto base = reinterpret_cast<unsigned char *>(buffer);
632 auto pos = base;
633 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
634
635 // Marker: multiply the final size with -1
636 pos += RNTupleSerializer::SerializeInt32(-1, *where);
637 pos += SerializeUInt32(nitems, *where);
638 return pos - base;
639}
640
642 void *frame, std::int32_t size)
643{
644 if (size < 0)
645 throw RException(R__FAIL("frame too large: " + std::to_string(size)));
646 if (size < static_cast<std::int32_t>(sizeof(std::int32_t)))
647 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
648 if (frame) {
649 std::int32_t marker;
650 DeserializeInt32(frame, marker);
651 if ((marker < 0) && (size < static_cast<std::int32_t>(2 * sizeof(std::int32_t))))
652 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
653
654 SerializeInt32(marker * size, frame);
655 }
656 return 0;
657}
658
660 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
661{
662 if (bufSize < sizeof(std::int32_t))
663 return R__FAIL("frame too short");
664
665 std::int32_t *ssize = reinterpret_cast<std::int32_t *>(&frameSize);
666 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
667 bytes += DeserializeInt32(bytes, *ssize);
668 if (*ssize >= 0) {
669 // Record frame
670 nitems = 1;
671 if (frameSize < sizeof(std::int32_t))
672 return R__FAIL("corrupt record frame size");
673 } else {
674 // List frame
675 if (bufSize < 2 * sizeof(std::int32_t))
676 return R__FAIL("frame too short");
677 bytes += DeserializeUInt32(bytes, nitems);
678 nitems &= (2 << 28) - 1;
679 *ssize = -(*ssize);
680 if (frameSize < 2 * sizeof(std::int32_t))
681 return R__FAIL("corrupt list frame size");
682 }
683
684 if (bufSize < frameSize)
685 return R__FAIL("frame too short");
686
687 return bytes - reinterpret_cast<const unsigned char *>(buffer);
688}
689
691 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize)
692{
693 std::uint32_t nitems;
694 return R__FORWARD_RESULT(DeserializeFrameHeader(buffer, bufSize, frameSize, nitems));
695}
696
698 const std::vector<std::int64_t> &flags, void *buffer)
699{
700 if (flags.empty())
701 return SerializeInt64(0, buffer);
702
703 if (buffer) {
704 auto bytes = reinterpret_cast<unsigned char *>(buffer);
705
706 for (unsigned i = 0; i < flags.size(); ++i) {
707 if (flags[i] < 0)
708 throw RException(R__FAIL("feature flag out of bounds"));
709
710 // The MSb indicates that another Int64 follows; set this bit to 1 for all except the last element
711 if (i == (flags.size() - 1))
712 SerializeInt64(flags[i], bytes);
713 else
714 bytes += SerializeInt64(flags[i] | 0x8000000000000000, bytes);
715 }
716 }
717 return (flags.size() * sizeof(std::int64_t));
718}
719
721 const void *buffer, std::uint32_t bufSize, std::vector<std::int64_t> &flags)
722{
723 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
724
725 flags.clear();
726 std::int64_t f;
727 do {
728 if (bufSize < sizeof(std::int64_t))
729 return R__FAIL("feature flag buffer too short");
730 bytes += DeserializeInt64(bytes, f);
731 bufSize -= sizeof(std::int64_t);
732 flags.emplace_back(f & ~0x8000000000000000);
733 } while (f < 0);
734
735 return (flags.size() * sizeof(std::int64_t));
736}
737
739 const RNTupleLocator &locator, void *buffer)
740{
741 std::uint32_t size = 0;
742 if (!locator.fUrl.empty()) {
743 if (locator.fUrl.length() >= (1 << 24))
744 throw RException(R__FAIL("locator too large"));
745 std::int32_t head = locator.fUrl.length();
746 head |= 0x02 << 24;
747 head = -head;
748 size += SerializeInt32(head, buffer);
749 if (buffer)
750 memcpy(reinterpret_cast<unsigned char *>(buffer) + size, locator.fUrl.data(), locator.fUrl.length());
751 size += locator.fUrl.length();
752 return size;
753 }
754
755 if (static_cast<std::int32_t>(locator.fBytesOnStorage) < 0)
756 throw RException(R__FAIL("locator too large"));
757 size += SerializeUInt32(locator.fBytesOnStorage, buffer);
758 size += SerializeUInt64(locator.fPosition, buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
759 return size;
760}
761
763 const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
764{
765 if (bufSize < sizeof(std::int32_t))
766 return R__FAIL("too short locator");
767
768 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
769 std::int32_t head;
770
771 bytes += DeserializeInt32(bytes, head);
772 bufSize -= sizeof(std::int32_t);
773 if (head < 0) {
774 head = -head;
775 int type = head >> 24;
776 if (type != 0x02)
777 return R__FAIL("unsupported locator type: " + std::to_string(type));
778 std::uint32_t locatorSize = static_cast<std::uint32_t>(head) & 0x00FFFFFF;
779 if (bufSize < locatorSize)
780 return R__FAIL("too short locator");
781 locator.fBytesOnStorage = 0;
782 locator.fPosition = 0;
783 locator.fUrl.resize(locatorSize);
784 memcpy(&locator.fUrl[0], bytes, locatorSize);
785 bytes += locatorSize;
786 } else {
787 if (bufSize < sizeof(std::uint64_t))
788 return R__FAIL("too short locator");
789 std::uint64_t offset;
790 bytes += DeserializeUInt64(bytes, offset);
791 locator.fUrl.clear();
792 locator.fBytesOnStorage = head;
793 locator.fPosition = offset;
794 }
795
796 return bytes - reinterpret_cast<const unsigned char *>(buffer);
797}
798
800 const REnvelopeLink &envelopeLink, void *buffer)
801{
802 auto size = SerializeUInt32(envelopeLink.fUnzippedSize, buffer);
803 size += SerializeLocator(envelopeLink.fLocator,
804 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
805 return size;
806}
807
809 const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
810{
811 if (bufSize < sizeof(std::int32_t))
812 return R__FAIL("too short envelope link");
813
814 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
815 bytes += DeserializeUInt32(bytes, envelopeLink.fUnzippedSize);
816 bufSize -= sizeof(std::uint32_t);
817 auto result = DeserializeLocator(bytes, bufSize, envelopeLink.fLocator);
818 if (!result)
819 return R__FORWARD_ERROR(result);
820 bytes += result.Unwrap();
821 return bytes - reinterpret_cast<const unsigned char *>(buffer);
822}
823
824
826 const RClusterSummary &clusterSummary, void *buffer)
827{
828 auto base = reinterpret_cast<unsigned char *>(buffer);
829 auto pos = base;
830 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
831
832 auto frame = pos;
833 pos += SerializeRecordFramePreamble(*where);
834 pos += SerializeUInt64(clusterSummary.fFirstEntry, *where);
835 if (clusterSummary.fColumnGroupID >= 0) {
836 pos += SerializeInt64(-static_cast<int64_t>(clusterSummary.fNEntries), *where);
837 pos += SerializeUInt32(clusterSummary.fColumnGroupID, *where);
838 } else {
839 pos += SerializeInt64(static_cast<int64_t>(clusterSummary.fNEntries), *where);
840 }
841 auto size = pos - frame;
842 pos += SerializeFramePostscript(frame, size);
843 return size;
844}
845
846
848 const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
849{
850 auto base = reinterpret_cast<const unsigned char *>(buffer);
851 auto bytes = base;
852 std::uint32_t frameSize;
853 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
854 if (!result)
855 return R__FORWARD_ERROR(result);
856 bytes += result.Unwrap();
857
858 auto fnBufSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
859 if (fnBufSizeLeft() < 2 * sizeof(std::uint64_t))
860 return R__FAIL("too short cluster summary");
861
862 bytes += DeserializeUInt64(bytes, clusterSummary.fFirstEntry);
863 std::int64_t nEntries;
864 bytes += DeserializeInt64(bytes, nEntries);
865
866 if (nEntries < 0) {
867 if (fnBufSizeLeft() < sizeof(std::uint32_t))
868 return R__FAIL("too short cluster summary");
869 clusterSummary.fNEntries = -nEntries;
870 std::uint32_t columnGroupID;
871 bytes += DeserializeUInt32(bytes, columnGroupID);
872 clusterSummary.fColumnGroupID = columnGroupID;
873 } else {
874 clusterSummary.fNEntries = nEntries;
875 clusterSummary.fColumnGroupID = -1;
876 }
877
878 return frameSize;
879}
880
881
883 const RClusterGroup &clusterGroup, void *buffer)
884{
885 auto base = reinterpret_cast<unsigned char *>(buffer);
886 auto pos = base;
887 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
888
889 auto frame = pos;
890 pos += SerializeRecordFramePreamble(*where);
891 pos += SerializeUInt32(clusterGroup.fNClusters, *where);
892 pos += SerializeEnvelopeLink(clusterGroup.fPageListEnvelopeLink, *where);
893 auto size = pos - frame;
894 pos += SerializeFramePostscript(frame, size);
895 return size;
896}
897
898
900 const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
901{
902 auto base = reinterpret_cast<const unsigned char *>(buffer);
903 auto bytes = base;
904
905 std::uint32_t frameSize;
906 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
907 if (!result)
908 return R__FORWARD_ERROR(result);
909 bytes += result.Unwrap();
910
911 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
912 if (fnFrameSizeLeft() < sizeof(std::uint32_t))
913 return R__FAIL("too short cluster group");
914
915 bytes += DeserializeUInt32(bytes, clusterGroup.fNClusters);
916 result = DeserializeEnvelopeLink(bytes, fnFrameSizeLeft(), clusterGroup.fPageListEnvelopeLink);
917 if (!result)
918 return R__FORWARD_ERROR(result);
919
920 return frameSize;
921}
922
923
926 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc)
927{
928 RContext context;
929
930 auto base = reinterpret_cast<unsigned char *>(buffer);
931 auto pos = base;
932 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
933
934 pos += SerializeEnvelopePreamble(*where);
935 // So far we don't make use of feature flags
936 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
937 pos += SerializeUInt32(kReleaseCandidateTag, *where);
938 pos += SerializeString(desc.GetName(), *where);
939 pos += SerializeString(desc.GetDescription(), *where);
940 pos += SerializeString(std::string("ROOT v") + ROOT_RELEASE, *where);
941
942 auto frame = pos;
943 R__ASSERT(desc.GetNFields() > 0); // we must have at least a zero field
944 pos += SerializeListFramePreamble(desc.GetNFields() - 1, *where);
945 pos += SerializeFieldTree(desc, context, *where);
946 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
947
948 frame = pos;
949 pos += SerializeListFramePreamble(desc.GetNColumns(), *where);
950 pos += SerializeColumnListV1(desc, context, *where);
951 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
952
953 // We don't use alias columns yet
954 frame = pos;
955 pos += SerializeListFramePreamble(0, *where);
956 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
957
958 // We don't use extra type information yet
959 frame = pos;
960 pos += SerializeListFramePreamble(0, *where);
961 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
962
963 std::uint32_t size = pos - base;
964 std::uint32_t crc32 = 0;
965 size += SerializeEnvelopePostscript(base, size, crc32, *where);
966
967 context.SetHeaderSize(size);
968 context.SetHeaderCRC32(crc32);
969 return context;
970}
971
973 void *buffer, const RNTupleDescriptor &desc, std::span<DescriptorId_t> physClusterIDs, const RContext &context)
974{
975 auto base = reinterpret_cast<unsigned char *>(buffer);
976 auto pos = base;
977 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
978
979 pos += SerializeEnvelopePreamble(*where);
980 auto topMostFrame = pos;
981 pos += SerializeListFramePreamble(physClusterIDs.size(), *where);
982
983 for (auto clusterId : physClusterIDs) {
984 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(clusterId));
985 // Get an ordered set of physical column ids
986 std::set<DescriptorId_t> physColumnIds;
987 for (auto column : clusterDesc.GetColumnIds())
988 physColumnIds.insert(context.GetPhysColumnId(column));
989
990 auto outerFrame = pos;
991 pos += SerializeListFramePreamble(physColumnIds.size(), *where);
992 for (auto physId : physColumnIds) {
993 auto memId = context.GetMemColumnId(physId);
994 const auto &columnRange = clusterDesc.GetColumnRange(memId);
995 const auto &pageRange = clusterDesc.GetPageRange(memId);
996
997 auto innerFrame = pos;
998 pos += SerializeListFramePreamble(pageRange.fPageInfos.size(), *where);
999
1000 for (const auto &pi : pageRange.fPageInfos) {
1001 pos += SerializeUInt32(pi.fNElements, *where);
1002 pos += SerializeLocator(pi.fLocator, *where);
1003 }
1004 pos += SerializeUInt64(columnRange.fFirstElementIndex, *where);
1005 pos += SerializeUInt32(columnRange.fCompressionSettings, *where);
1006
1007 pos += SerializeFramePostscript(buffer ? innerFrame : nullptr, pos - innerFrame);
1008 }
1009 pos += SerializeFramePostscript(buffer ? outerFrame : nullptr, pos - outerFrame);
1010 }
1011
1012 pos += SerializeFramePostscript(buffer ? topMostFrame : nullptr, pos - topMostFrame);
1013 std::uint32_t size = pos - base;
1014 size += SerializeEnvelopePostscript(base, size, *where);
1015 return size;
1016}
1017
1019 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc, const RContext &context)
1020{
1021 auto base = reinterpret_cast<unsigned char *>(buffer);
1022 auto pos = base;
1023 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1024
1025 pos += SerializeEnvelopePreamble(*where);
1026
1027 // So far we don't make use of feature flags
1028 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
1029 pos += SerializeUInt32(context.GetHeaderCRC32(), *where);
1030
1031 // So far no support for extension headers
1032 auto frame = pos;
1033 pos += SerializeListFramePreamble(0, *where);
1034 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1035
1036 // So far no support for shared clusters (no column groups)
1037 frame = pos;
1038 pos += SerializeListFramePreamble(0, *where);
1039 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1040
1041 // Cluster summaries
1042 const auto nClusterGroups = desc.GetNClusterGroups();
1043 unsigned int nClusters = 0;
1044 for (const auto &cgDesc : desc.GetClusterGroupIterable())
1045 nClusters += cgDesc.GetNClusters();
1046 frame = pos;
1047 pos += SerializeListFramePreamble(nClusters, *where);
1048 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1049 const auto &cgDesc = desc.GetClusterGroupDescriptor(context.GetMemClusterGroupId(i));
1050 const auto nClustersInGroup = cgDesc.GetNClusters();
1051 const auto &clusterIds = cgDesc.GetClusterIds();
1052 for (unsigned int j = 0; j < nClustersInGroup; ++j) {
1053 const auto &clusterDesc = desc.GetClusterDescriptor(clusterIds[j]);
1054 RClusterSummary summary{clusterDesc.GetFirstEntryIndex(), clusterDesc.GetNEntries(), -1};
1055 pos += SerializeClusterSummary(summary, *where);
1056 }
1057 }
1058 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1059
1060 // Cluster groups
1061 frame = pos;
1062 pos += SerializeListFramePreamble(nClusterGroups, *where);
1063 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1064 const auto &cgDesc = desc.GetClusterGroupDescriptor(context.GetMemClusterGroupId(i));
1065 RClusterGroup clusterGroup;
1066 clusterGroup.fNClusters = cgDesc.GetNClusters();
1067 clusterGroup.fPageListEnvelopeLink.fUnzippedSize = cgDesc.GetPageListLength();
1068 clusterGroup.fPageListEnvelopeLink.fLocator = cgDesc.GetPageListLocator();
1069 pos += SerializeClusterGroup(clusterGroup, *where);
1070 }
1071 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1072
1073 // So far no support for meta-data
1074 frame = pos;
1075 pos += SerializeListFramePreamble(0, *where);
1076 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1077
1078 std::uint32_t size = pos - base;
1079 size += SerializeEnvelopePostscript(base, size, *where);
1080 return size;
1081}
1082
1084 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1085{
1086 auto base = reinterpret_cast<const unsigned char *>(buffer);
1087 auto bytes = base;
1088 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1090
1091 std::uint32_t crc32{0};
1092 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), crc32);
1093 if (!result)
1094 return R__FORWARD_ERROR(result);
1095 bytes += result.Unwrap();
1096 descBuilder.SetHeaderCRC32(crc32);
1097
1098 std::vector<std::int64_t> featureFlags;
1099 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1100 if (!result)
1101 return R__FORWARD_ERROR(result);
1102 bytes += result.Unwrap();
1103 for (auto f: featureFlags) {
1104 if (f)
1105 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1106 }
1107
1108 std::uint32_t rcTag;
1109 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1110 return R__FAIL("header too short");
1111 bytes += DeserializeUInt32(bytes, rcTag);
1112 if (rcTag > 0) {
1113 R__LOG_WARNING(NTupleLog()) << "Pre-release format version: RC " << rcTag;
1114 }
1115
1116 std::string name;
1117 std::string description;
1118 std::string writer;
1119 result = DeserializeString(bytes, fnBufSizeLeft(), name);
1120 if (!result)
1121 return R__FORWARD_ERROR(result);
1122 bytes += result.Unwrap();
1123 result = DeserializeString(bytes, fnBufSizeLeft(), description);
1124 if (!result)
1125 return R__FORWARD_ERROR(result);
1126 bytes += result.Unwrap();
1127 result = DeserializeString(bytes, fnBufSizeLeft(), writer);
1128 if (!result)
1129 return R__FORWARD_ERROR(result);
1130 bytes += result.Unwrap();
1131 descBuilder.SetNTuple(name, description);
1132
1133 std::uint32_t frameSize;
1134 auto frame = bytes;
1135 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1136
1137 std::uint32_t nFields;
1138 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nFields);
1139 if (!result)
1140 return R__FORWARD_ERROR(result);
1141 bytes += result.Unwrap();
1142 // Zero field
1143 descBuilder.AddField(RFieldDescriptorBuilder()
1144 .FieldId(kZeroFieldId)
1145 .Structure(ENTupleStructure::kRecord)
1146 .MakeDescriptor()
1147 .Unwrap());
1148 for (std::uint32_t fieldId = 0; fieldId < nFields; ++fieldId) {
1149 RFieldDescriptorBuilder fieldBuilder;
1150 result = DeserializeFieldV1(bytes, fnFrameSizeLeft(), fieldBuilder);
1151 if (!result)
1152 return R__FORWARD_ERROR(result);
1153 bytes += result.Unwrap();
1154 if (fieldId == fieldBuilder.GetParentId())
1155 fieldBuilder.ParentId(kZeroFieldId);
1156 auto fieldDesc = fieldBuilder.FieldId(fieldId).MakeDescriptor();
1157 if (!fieldDesc)
1158 return R__FORWARD_ERROR(fieldDesc);
1159 auto parentId = fieldDesc.Inspect().GetParentId();
1160 descBuilder.AddField(fieldDesc.Unwrap());
1161 auto resVoid = descBuilder.AddFieldLink(parentId, fieldId);
1162 if (!resVoid)
1163 return R__FORWARD_ERROR(resVoid);
1164 }
1165 bytes = frame + frameSize;
1166
1167 std::uint32_t nColumns;
1168 frame = bytes;
1169 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumns);
1170 if (!result)
1171 return R__FORWARD_ERROR(result);
1172 bytes += result.Unwrap();
1173 std::unordered_map<DescriptorId_t, std::uint32_t> maxIndexes;
1174 for (std::uint32_t columnId = 0; columnId < nColumns; ++columnId) {
1175 RColumnDescriptorBuilder columnBuilder;
1176 result = DeserializeColumnV1(bytes, fnFrameSizeLeft(), columnBuilder);
1177 if (!result)
1178 return R__FORWARD_ERROR(result);
1179 bytes += result.Unwrap();
1180
1181 std::uint32_t idx = 0;
1182 const auto fieldId = columnBuilder.GetFieldId();
1183 auto maxIdx = maxIndexes.find(fieldId);
1184 if (maxIdx != maxIndexes.end())
1185 idx = maxIdx->second + 1;
1186 maxIndexes[fieldId] = idx;
1187
1188 auto columnDesc = columnBuilder.Index(idx).ColumnId(columnId).MakeDescriptor();
1189 if (!columnDesc)
1190 return R__FORWARD_ERROR(columnDesc);
1191 auto resVoid = descBuilder.AddColumn(columnDesc.Unwrap());
1192 if (!resVoid)
1193 return R__FORWARD_ERROR(resVoid);
1194 }
1195 bytes = frame + frameSize;
1196
1197 std::uint32_t nAliasColumns;
1198 frame = bytes;
1199 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAliasColumns);
1200 if (!result)
1201 return R__FORWARD_ERROR(result);
1202 bytes += result.Unwrap();
1203 if (nAliasColumns > 0)
1204 R__LOG_WARNING(NTupleLog()) << "Alias columns are still unsupported! ";
1205
1206 std::uint32_t nTypeInfo;
1207 frame = bytes;
1208 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nTypeInfo);
1209 if (!result)
1210 return R__FORWARD_ERROR(result);
1211 bytes += result.Unwrap();
1212 if (nTypeInfo > 0)
1213 R__LOG_WARNING(NTupleLog()) << "Extra type information is still unsupported! ";
1214
1215 return RResult<void>::Success();
1216}
1217
1218
1220 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1221{
1222 auto base = reinterpret_cast<const unsigned char *>(buffer);
1223 auto bytes = base;
1224 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1226
1227 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1228 if (!result)
1229 return R__FORWARD_ERROR(result);
1230 bytes += result.Unwrap();
1231
1232 std::vector<std::int64_t> featureFlags;
1233 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1234 if (!result)
1235 return R__FORWARD_ERROR(result);
1236 bytes += result.Unwrap();
1237 for (auto f: featureFlags) {
1238 if (f)
1239 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1240 }
1241
1242 std::uint32_t crc32{0};
1243 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1244 return R__FAIL("footer too short");
1245 bytes += DeserializeUInt32(bytes, crc32);
1246 if (crc32 != descBuilder.GetHeaderCRC32())
1247 return R__FAIL("CRC32 mismatch between header and footer");
1248
1249 std::uint32_t frameSize;
1250 auto frame = bytes;
1251 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1252
1253 std::uint32_t nXHeaders;
1254 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nXHeaders);
1255 if (!result)
1256 return R__FORWARD_ERROR(result);
1257 if (nXHeaders > 0)
1258 R__LOG_WARNING(NTupleLog()) << "extension headers are still unsupported";
1259 bytes = frame + frameSize;
1260
1261 std::uint32_t nColumnGroups;
1262 frame = bytes;
1263 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumnGroups);
1264 if (!result)
1265 return R__FORWARD_ERROR(result);
1266 if (nColumnGroups > 0)
1267 return R__FAIL("sharded clusters are still unsupported");
1268 bytes = frame + frameSize;
1269
1270 std::uint32_t nClusterSummaries;
1271 frame = bytes;
1272 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterSummaries);
1273 if (!result)
1274 return R__FORWARD_ERROR(result);
1275 bytes += result.Unwrap();
1276 for (std::uint32_t clusterId = 0; clusterId < nClusterSummaries; ++clusterId) {
1277 RClusterSummary clusterSummary;
1278 result = DeserializeClusterSummary(bytes, fnFrameSizeLeft(), clusterSummary);
1279 if (!result)
1280 return R__FORWARD_ERROR(result);
1281 bytes += result.Unwrap();
1282 if (clusterSummary.fColumnGroupID >= 0)
1283 return R__FAIL("sharded clusters are still unsupported");
1284 descBuilder.AddClusterSummary(clusterId, clusterSummary.fFirstEntry, clusterSummary.fNEntries);
1285 }
1286 bytes = frame + frameSize;
1287
1288 std::uint32_t nClusterGroups;
1289 frame = bytes;
1290 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups);
1291 if (!result)
1292 return R__FORWARD_ERROR(result);
1293 bytes += result.Unwrap();
1294 std::uint64_t clusterId = 0;
1295 for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1296 RClusterGroup clusterGroup;
1297 result = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup);
1298 if (!result)
1299 return R__FORWARD_ERROR(result);
1300 bytes += result.Unwrap();
1301
1303 RClusterGroupDescriptorBuilder clusterGroupBuilder;
1304 clusterGroupBuilder.ClusterGroupId(groupId)
1307 for (std::uint64_t i = 0; i < clusterGroup.fNClusters; ++i)
1308 clusterGroupBuilder.AddCluster(clusterId + i);
1309 clusterId += clusterGroup.fNClusters;
1310 descBuilder.AddClusterGroup(std::move(clusterGroupBuilder));
1311 }
1312 bytes = frame + frameSize;
1313
1314 std::uint32_t nMDBlocks;
1315 frame = bytes;
1316 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nMDBlocks);
1317 if (!result)
1318 return R__FORWARD_ERROR(result);
1319 if (nMDBlocks > 0)
1320 R__LOG_WARNING(NTupleLog()) << "meta-data blocks are still unsupported";
1321 bytes = frame + frameSize;
1322
1323 return RResult<void>::Success();
1324}
1325
1326
1328 const void *buffer, std::uint32_t bufSize, std::vector<RClusterDescriptorBuilder> &clusters)
1329{
1330 auto base = reinterpret_cast<const unsigned char *>(buffer);
1331 auto bytes = base;
1332 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1334
1335 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1336 if (!result)
1337 return R__FORWARD_ERROR(result);
1338 bytes += result.Unwrap();
1339
1340 std::uint32_t topMostFrameSize;
1341 auto topMostFrame = bytes;
1342 auto fnTopMostFrameSizeLeft = [&]() { return topMostFrameSize - (bytes - topMostFrame); };
1343
1344 std::uint32_t nClusters;
1345 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), topMostFrameSize, nClusters);
1346 if (!result)
1347 return R__FORWARD_ERROR(result);
1348 bytes += result.Unwrap();
1349
1350 if (nClusters != clusters.size())
1351 return R__FAIL("mismatch of page list and cluster summaries");
1352
1353 for (std::uint32_t i = 0; i < nClusters; ++i) {
1354 std::uint32_t outerFrameSize;
1355 auto outerFrame = bytes;
1356 auto fnOuterFrameSizeLeft = [&]() { return outerFrameSize - (bytes - outerFrame); };
1357
1358 std::uint32_t nColumns;
1359 result = DeserializeFrameHeader(bytes, fnTopMostFrameSizeLeft(), outerFrameSize, nColumns);
1360 if (!result)
1361 return R__FORWARD_ERROR(result);
1362 bytes += result.Unwrap();
1363
1364 for (std::uint32_t j = 0; j < nColumns; ++j) {
1365 std::uint32_t innerFrameSize;
1366 auto innerFrame = bytes;
1367 auto fnInnerFrameSizeLeft = [&]() { return innerFrameSize - (bytes - innerFrame); };
1368
1369 std::uint32_t nPages;
1370 result = DeserializeFrameHeader(bytes, fnOuterFrameSizeLeft(), innerFrameSize, nPages);
1371 if (!result)
1372 return R__FORWARD_ERROR(result);
1373 bytes += result.Unwrap();
1374
1376 pageRange.fColumnId = j;
1377 for (std::uint32_t k = 0; k < nPages; ++k) {
1378 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1379 return R__FAIL("inner frame too short");
1380 std::uint32_t nElements;
1381 RNTupleLocator locator;
1382 bytes += DeserializeUInt32(bytes, nElements);
1383 result = DeserializeLocator(bytes, fnInnerFrameSizeLeft(), locator);
1384 if (!result)
1385 return R__FORWARD_ERROR(result);
1386 pageRange.fPageInfos.push_back({ClusterSize_t(nElements), locator});
1387 bytes += result.Unwrap();
1388 }
1389
1390 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t) + sizeof(std::uint64_t)))
1391 return R__FAIL("page list frame too short");
1392 std::uint64_t columnOffset;
1393 bytes += DeserializeUInt64(bytes, columnOffset);
1394 std::uint32_t compressionSettings;
1395 bytes += DeserializeUInt32(bytes, compressionSettings);
1396
1397 clusters[i].CommitColumnRange(j, columnOffset, compressionSettings, pageRange);
1398 bytes = innerFrame + innerFrameSize;
1399 }
1400
1401 bytes = outerFrame + outerFrameSize;
1402 }
1403
1404 return RResult<void>::Success();
1405}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition: RError.hxx:295
#define R__FORWARD_RESULT(res)
Short-hand to return an RResult<T> value from a subroutine to the calling stack frame.
Definition: RError.hxx:293
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition: RError.hxx:291
#define R__LOG_WARNING(...)
Definition: RLogger.hxx:363
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define ROOT_RELEASE
Definition: RVersion.h:17
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition: TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition: TGX11.cxx:110
@ kCollection
Definition: TStructNode.h:21
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetMemClusterId(DescriptorId_t physId) const
DescriptorId_t GetPhysColumnId(DescriptorId_t memId) const
DescriptorId_t GetPhysFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemClusterGroupId(DescriptorId_t physId) const
DescriptorId_t GetMemColumnId(DescriptorId_t physId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Base class for all ROOT issued exceptions.
Definition: RError.hxx:114
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
A helper class for piece-wise construction of an RNTupleDescriptor.
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
Definition: RError.hxx:257
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition: RError.hxx:195
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
Definition: RNTupleUtil.cxx:24
RClusterSize ClusterSize_t
Definition: RNTupleUtil.hxx:62
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:37
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
static constexpr double pi
Definition: writer.py:1
Records the parition of data into pages for a particular column in a particular cluster.
Generic information about the physical location of data.