Logo ROOT  
Reference Guide
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
17#include <ROOT/RNTupleModel.hxx>
18#include <ROOT/RNTupleUtil.hxx>
19#include <ROOT/RStringView.hxx>
20
21#include <RZip.h>
22#include <TError.h>
23
24#include <algorithm>
25#include <cstdint>
26#include <cstring>
27#include <iostream>
28#include <utility>
29
30namespace {
31
32/// The machine-independent serialization of meta-data wraps the header and footer as well as sub structures in
33/// frames. The frame layout is
34///
35/// -----------------------------------------------------------
36/// | TYPE | DESCRIPTION |
37/// |----------------------------------------------------------
38/// | std::uint16_t | Version used to write the frame |
39/// | std::uint16_t | Minimum version for reading the frame |
40/// | std::uint32_t | Length of the frame incl. preamble |
41/// -----------------------------------------------------------
42///
43/// In addition, the header and footer store a 4 byte CRC32 checksum of the frame immediately after the frame.
44/// The footer also repeats the frame size just before the CRC32 checksum. That means, one can read the last 8 bytes
45/// to determine the footer length, and the first 8 bytes to determine the header length.
46///
47/// Within the frames, integers of different lengths are stored in a machine-independent representation. Strings and
48/// vectors store the number of items followed by the items. Time stamps are stored in number of seconds since the
49/// UNIX epoch.
50
51std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
52{
53 if (buffer != nullptr) {
54 auto bytes = reinterpret_cast<unsigned char *>(buffer);
55 bytes[0] = (val & 0x00000000000000FF);
56 bytes[1] = (val & 0x000000000000FF00) >> 8;
57 bytes[2] = (val & 0x0000000000FF0000) >> 16;
58 bytes[3] = (val & 0x00000000FF000000) >> 24;
59 bytes[4] = (val & 0x000000FF00000000) >> 32;
60 bytes[5] = (val & 0x0000FF0000000000) >> 40;
61 bytes[6] = (val & 0x00FF000000000000) >> 48;
62 bytes[7] = (val & 0xFF00000000000000) >> 56;
63 }
64 return 8;
65}
66
67std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
68{
69 return SerializeInt64(val, buffer);
70}
71
72std::uint32_t DeserializeInt64(const void *buffer, std::int64_t *val)
73{
74 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
75 *val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
76 (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
77 (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
78 (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
79 return 8;
80}
81
82std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t *val)
83{
84 return DeserializeInt64(buffer, reinterpret_cast<std::int64_t *>(val));
85}
86
87std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
88{
89 if (buffer != nullptr) {
90 auto bytes = reinterpret_cast<unsigned char *>(buffer);
91 bytes[0] = (val & 0x000000FF);
92 bytes[1] = (val & 0x0000FF00) >> 8;
93 bytes[2] = (val & 0x00FF0000) >> 16;
94 bytes[3] = (val & 0xFF000000) >> 24;
95 }
96 return 4;
97}
98
99std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
100{
101 return SerializeInt32(val, buffer);
102}
103
104std::uint32_t DeserializeInt32(const void *buffer, std::int32_t *val)
105{
106 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
107 *val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
108 (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
109 return 4;
110}
111
112std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t *val)
113{
114 return DeserializeInt32(buffer, reinterpret_cast<std::int32_t *>(val));
115}
116
117std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
118{
119 if (buffer != nullptr) {
120 auto bytes = reinterpret_cast<unsigned char *>(buffer);
121 bytes[0] = (val & 0x00FF);
122 bytes[1] = (val & 0xFF00) >> 8;
123 }
124 return 2;
125}
126
127std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
128{
129 return SerializeInt16(val, buffer);
130}
131
132std::uint32_t DeserializeInt16(const void *buffer, std::int16_t *val)
133{
134 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
135 *val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
136 return 2;
137}
138
139std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t *val)
140{
141 return DeserializeInt16(buffer, reinterpret_cast<std::int16_t *>(val));
142}
143
144std::uint32_t SerializeClusterSize(ROOT::Experimental::ClusterSize_t val, void *buffer)
145{
146 return SerializeUInt32(val, buffer);
147}
148
149std::uint32_t DeserializeClusterSize(const void *buffer, ROOT::Experimental::ClusterSize_t *val)
150{
151 std::uint32_t size;
152 auto nbytes = DeserializeUInt32(buffer, &size);
153 *val = size;
154 return nbytes;
155}
156
157std::uint32_t SerializeString(const std::string &val, void *buffer)
158{
159 if (buffer != nullptr) {
160 auto pos = reinterpret_cast<unsigned char *>(buffer);
161 pos += SerializeUInt32(val.length(), pos);
162 memcpy(pos, val.data(), val.length());
163 }
164 return SerializeUInt32(val.length(), nullptr) + val.length();
165}
166
167std::uint32_t DeserializeString(const void *buffer, std::string *val)
168{
169 auto base = reinterpret_cast<const unsigned char *>(buffer);
170 auto bytes = base;
171 std::uint32_t length;
172 bytes += DeserializeUInt32(buffer, &length);
173 val->resize(length);
174 memcpy(&(*val)[0], bytes, length);
175 return bytes + length - base;
176}
177
178std::uint32_t SerializeLocator(const ROOT::Experimental::RClusterDescriptor::RLocator &val, void *buffer)
179{
180 // In order to keep the meta-data small, we don't wrap the locator in a frame
181 if (buffer != nullptr) {
182 auto pos = reinterpret_cast<unsigned char *>(buffer);
183 pos += SerializeInt64(val.fPosition, pos);
184 pos += SerializeUInt32(val.fBytesOnStorage, pos);
185 pos += SerializeString(val.fUrl, pos);
186 }
187 return SerializeString(val.fUrl, nullptr) + 12;
188}
189
190std::uint32_t DeserializeLocator(const void *buffer, ROOT::Experimental::RClusterDescriptor::RLocator *val)
191{
192 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
193 bytes += DeserializeInt64(bytes, &val->fPosition);
194 bytes += DeserializeUInt32(bytes, &val->fBytesOnStorage);
195 bytes += DeserializeString(bytes, &val->fUrl);
196 return SerializeString(val->fUrl, nullptr) + 12;
197}
198
199std::uint32_t SerializeFrame(std::uint16_t protocolVersionCurrent, std::uint16_t protocolVersionMin, void *buffer,
200 void **ptrSize)
201{
202 if (buffer != nullptr) {
203 auto pos = reinterpret_cast<unsigned char *>(buffer);
204 pos += SerializeUInt16(protocolVersionCurrent, pos); // The protocol version used to write the structure
205 pos += SerializeUInt16(protocolVersionMin, pos); // The minimum protocol version required to read the data
206 *ptrSize = pos;
207 pos += SerializeUInt32(0, pos); // placeholder for the size of the frame
208 }
209 return 8;
210}
211
212std::uint32_t DeserializeFrame(std::uint16_t protocolVersion, const void *buffer, std::uint32_t *size)
213{
214 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
215 std::uint16_t protocolVersionAtWrite;
216 std::uint16_t protocolVersionMinRequired;
217 bytes += DeserializeUInt16(bytes, &protocolVersionAtWrite);
218 bytes += DeserializeUInt16(bytes, &protocolVersionMinRequired);
219 R__ASSERT(protocolVersionAtWrite >= protocolVersionMinRequired);
220 R__ASSERT(protocolVersion >= protocolVersionMinRequired);
221 bytes += DeserializeUInt32(bytes, size);
222 return 8;
223}
224
225std::uint32_t SerializeVersion(const ROOT::Experimental::RNTupleVersion &val, void *buffer)
226{
227 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
228 auto pos = base;
229 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
230
231 void *ptrSize = nullptr;
232 pos += SerializeFrame(0, 0, *where, &ptrSize);
233
234 pos += SerializeUInt32(val.GetVersionUse(), *where);
235 pos += SerializeUInt32(val.GetVersionMin(), *where);
236 pos += SerializeUInt64(val.GetFlags(), *where);
237
238 auto size = pos - base;
239 SerializeUInt32(size, ptrSize);
240 return size;
241}
242
243std::uint32_t DeserializeVersion(const void *buffer, ROOT::Experimental::RNTupleVersion *version)
244{
245 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
246 std::uint32_t frameSize;
247 bytes += DeserializeFrame(0, bytes, &frameSize);
248
249 std::uint32_t versionUse;
250 std::uint32_t versionMin;
251 std::uint64_t flags;
252 bytes += DeserializeUInt32(bytes, &versionUse);
253 bytes += DeserializeUInt32(bytes, &versionMin);
254 bytes += DeserializeUInt64(bytes, &flags);
255 *version = ROOT::Experimental::RNTupleVersion(versionUse, versionMin, flags);
256
257 return frameSize;
258}
259
260std::uint32_t SerializeUuid(const ROOT::Experimental::RNTupleUuid &val, void *buffer)
261{
262 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
263 auto pos = base;
264 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
265
266 void *ptrSize = nullptr;
267 pos += SerializeFrame(0, 0, *where, &ptrSize);
268
269 pos += SerializeString(val, *where);
270
271 auto size = pos - base;
272 SerializeUInt32(size, ptrSize);
273 return size;
274}
275
276std::uint32_t DeserializeUuid(const void *buffer, ROOT::Experimental::RNTupleUuid *uuid)
277{
278 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
279 std::uint32_t frameSize;
280 bytes += DeserializeFrame(0, bytes, &frameSize);
281
282 bytes += DeserializeString(bytes, uuid);
283
284 return frameSize;
285}
286
287std::uint32_t SerializeColumnModel(const ROOT::Experimental::RColumnModel &val, void *buffer)
288{
289 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
290 auto pos = base;
291 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
292
293 void *ptrSize = nullptr;
294 pos += SerializeFrame(0, 0, *where, &ptrSize);
295
296 pos += SerializeInt32(static_cast<int>(val.GetType()), *where);
297 pos += SerializeInt32(static_cast<int>(val.GetIsSorted()), *where);
298
299 auto size = pos - base;
300 SerializeUInt32(size, ptrSize);
301 return size;
302}
303
304std::uint32_t DeserializeColumnModel(const void *buffer, ROOT::Experimental::RColumnModel *columnModel)
305{
306 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
307 std::uint32_t frameSize;
308 bytes += DeserializeFrame(0, bytes, &frameSize);
309
310 std::int32_t type;
311 std::int32_t isSorted;
312 bytes += DeserializeInt32(bytes, &type);
313 bytes += DeserializeInt32(bytes, &isSorted);
314 *columnModel = ROOT::Experimental::RColumnModel(static_cast<ROOT::Experimental::EColumnType>(type), isSorted);
315
316 return frameSize;
317}
318
319std::uint32_t SerializeTimeStamp(const std::chrono::system_clock::time_point &val, void *buffer)
320{
321 return SerializeInt64(std::chrono::system_clock::to_time_t(val), buffer);
322}
323
324std::uint32_t DeserializeTimeStamp(const void *buffer, std::chrono::system_clock::time_point *timeStamp)
325{
326 std::int64_t secSinceUnixEpoch;
327 auto size = DeserializeInt64(buffer, &secSinceUnixEpoch);
328 *timeStamp = std::chrono::system_clock::from_time_t(secSinceUnixEpoch);
329 return size;
330}
331
332std::uint32_t SerializeColumnRange(const ROOT::Experimental::RClusterDescriptor::RColumnRange &val, void *buffer)
333{
334 // To keep the cluster footers small, we don't put a frame around individual column ranges.
335 if (buffer != nullptr) {
336 auto pos = reinterpret_cast<unsigned char *>(buffer);
337 // The column id is stored in SerializeFooter() for the column range and the page range altogether
338 pos += SerializeUInt64(val.fFirstElementIndex, pos);
339 pos += SerializeClusterSize(val.fNElements, pos);
340 pos += SerializeInt64(val.fCompressionSettings, pos);
341 }
342 return 20;
343}
344
345std::uint32_t DeserializeColumnRange(const void *buffer,
347{
348 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
349 // The column id is set elsewhere (see AddClustersFromFooter())
350 bytes += DeserializeUInt64(bytes, &columnRange->fFirstElementIndex);
351 bytes += DeserializeClusterSize(bytes, &columnRange->fNElements);
352 bytes += DeserializeInt64(bytes, &columnRange->fCompressionSettings);
353 return 20;
354}
355
356std::uint32_t SerializePageInfo(const ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo &val, void *buffer)
357{
358 // To keep the cluster footers small, we don't put a frame around individual page infos.
359 if (buffer != nullptr) {
360 auto pos = reinterpret_cast<unsigned char *>(buffer);
361 // The column id is stored in SerializeFooter() for the column range and the page range altogether
362 pos += SerializeClusterSize(val.fNElements, pos);
363 pos += SerializeLocator(val.fLocator, pos);
364 }
365 return 4 + SerializeLocator(val.fLocator, nullptr);
366}
367
368std::uint32_t DeserializePageInfo(const void *buffer,
370{
371 auto base = reinterpret_cast<const unsigned char *>(buffer);
372 auto bytes = base;
373 // The column id is set elsewhere (see AddClustersFromFooter())
374 bytes += DeserializeClusterSize(bytes, &pageInfo->fNElements);
375 bytes += DeserializeLocator(bytes, &pageInfo->fLocator);
376 return bytes - base;
377}
378
379std::uint32_t SerializeCrc32(const unsigned char *data, std::uint32_t length, void *buffer)
380{
381 auto checksum = R__crc32(0, nullptr, 0);
382 if (buffer != nullptr) {
383 checksum = R__crc32(checksum, data, length);
384 SerializeUInt32(checksum, buffer);
385 }
386 return 4;
387}
388
389void VerifyCrc32(const unsigned char *data, std::uint32_t length)
390{
391 auto checksumReal = R__crc32(0, nullptr, 0);
392 checksumReal = R__crc32(checksumReal, data, length);
393 std::uint32_t checksumFound;
394 DeserializeUInt32(data + length, &checksumFound);
395 R__ASSERT(checksumFound == checksumReal);
396}
397
398std::uint32_t SerializeField(const ROOT::Experimental::RFieldDescriptor &val, void *buffer)
399{
400 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
401 auto pos = base;
402 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
403
404 void *ptrSize = nullptr;
407
408 pos += SerializeUInt64(val.GetId(), *where);
409 pos += SerializeVersion(val.GetFieldVersion(), *where);
410 pos += SerializeVersion(val.GetTypeVersion(), *where);
411 pos += SerializeString(val.GetFieldName(), *where);
412 pos += SerializeString(val.GetFieldDescription(), *where);
413 pos += SerializeString(val.GetTypeName(), *where);
414 pos += SerializeUInt64(val.GetNRepetitions(), *where);
415 pos += SerializeUInt32(static_cast<int>(val.GetStructure()), *where);
416 pos += SerializeUInt64(val.GetParentId(), *where);
417 pos += SerializeUInt32(val.GetLinkIds().size(), *where);
418 for (const auto& l : val.GetLinkIds())
419 pos += SerializeUInt64(l, *where);
420
421 auto size = pos - base;
422 SerializeUInt32(size, ptrSize);
423 return size;
424}
425
426std::uint32_t SerializeColumn(const ROOT::Experimental::RColumnDescriptor &val, void *buffer)
427{
428 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
429 auto pos = base;
430 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
431
432 void *ptrSize = nullptr;
435
436 pos += SerializeUInt64(val.GetId(), *where);
437 pos += SerializeVersion(val.GetVersion(), *where);
438 pos += SerializeColumnModel(val.GetModel(), *where);
439 pos += SerializeUInt64(val.GetFieldId(), *where);
440 pos += SerializeUInt32(val.GetIndex(), *where);
441
442 auto size = pos - base;
443 SerializeUInt32(size, ptrSize);
444 return size;
445}
446
447std::uint32_t SerializeClusterSummary(const ROOT::Experimental::RClusterDescriptor &val, void *buffer)
448{
449 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
450 auto pos = base;
451 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
452
453 void *ptrSize = nullptr;
456
457 pos += SerializeUInt64(val.GetId(), *where);
458 pos += SerializeVersion(val.GetVersion(), *where);
459 pos += SerializeUInt64(val.GetFirstEntryIndex(), *where);
460 pos += SerializeUInt64(val.GetNEntries(), *where);
461 pos += SerializeLocator(val.GetLocator(), *where);
462
463 auto size = pos - base;
464 SerializeUInt32(size, ptrSize);
465 return size;
466}
467
468} // anonymous namespace
469
470
471////////////////////////////////////////////////////////////////////////////////
472
473
475 return fFieldId == other.fFieldId &&
476 fFieldVersion == other.fFieldVersion &&
477 fTypeVersion == other.fTypeVersion &&
478 fFieldName == other.fFieldName &&
480 fTypeName == other.fTypeName &&
481 fNRepetitions == other.fNRepetitions &&
482 fStructure == other.fStructure &&
483 fParentId == other.fParentId &&
484 fLinkIds == other.fLinkIds;
485}
486
487
488////////////////////////////////////////////////////////////////////////////////
489
490
492 return fColumnId == other.fColumnId &&
493 fVersion == other.fVersion &&
494 fModel == other.fModel &&
495 fFieldId == other.fFieldId &&
496 fIndex == other.fIndex;
497}
498
499
500////////////////////////////////////////////////////////////////////////////////
501
502
504 return fClusterId == other.fClusterId &&
505 fVersion == other.fVersion &&
506 fFirstEntryIndex == other.fFirstEntryIndex &&
507 fNEntries == other.fNEntries &&
508 fLocator == other.fLocator &&
509 fColumnRanges == other.fColumnRanges &&
510 fPageRanges == other.fPageRanges;
511}
512
513
514////////////////////////////////////////////////////////////////////////////////
515
516
518 return fName == other.fName &&
519 fDescription == other.fDescription &&
520 fAuthor == other.fAuthor &&
521 fCustodian == other.fCustodian &&
522 fTimeStampData == other.fTimeStampData &&
523 fTimeStampWritten == other.fTimeStampWritten &&
524 fVersion == other.fVersion &&
525 fOwnUuid == other.fOwnUuid &&
526 fGroupUuid == other.fGroupUuid &&
527 fFieldDescriptors == other.fFieldDescriptors &&
528 fColumnDescriptors == other.fColumnDescriptors &&
529 fClusterDescriptors == other.fClusterDescriptors;
530}
531
532
534{
535 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
536 auto pos = base;
537 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
538
539 void *ptrSize = nullptr;
540 pos += SerializeFrame(
542 pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
543
544 pos += SerializeString(fName, *where);
545 pos += SerializeString(fDescription, *where);
546 pos += SerializeString(fAuthor, *where);
547 pos += SerializeString(fCustodian, *where);
548 pos += SerializeTimeStamp(fTimeStampData, *where);
549 pos += SerializeTimeStamp(fTimeStampWritten, *where);
550 pos += SerializeVersion(fVersion, *where);
551 pos += SerializeUuid(fOwnUuid, *where);
552 pos += SerializeUuid(fGroupUuid, *where);
553 pos += SerializeUInt32(fFieldDescriptors.size(), *where);
554 for (const auto& f : fFieldDescriptors) {
555 pos += SerializeField(f.second, *where);
556 }
557 pos += SerializeUInt32(fColumnDescriptors.size(), *where);
558 for (const auto& c : fColumnDescriptors) {
559 pos += SerializeColumn(c.second, *where);
560 }
561
562 std::uint32_t size = pos - base;
563 SerializeUInt32(size, ptrSize);
564 size += SerializeCrc32(base, size, *where);
565
566 return size;
567}
568
570{
571 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
572 auto pos = base;
573 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
574
575 void *ptrSize = nullptr;
576 pos += SerializeFrame(
578 pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
579
580 pos += SerializeUInt64(fClusterDescriptors.size(), *where);
581 for (const auto& cluster : fClusterDescriptors) {
582 pos += SerializeUuid(fOwnUuid, *where); // in order to verify that header and footer belong together
583 pos += SerializeClusterSummary(cluster.second, *where);
584
585 pos += SerializeUInt32(fColumnDescriptors.size(), *where);
586 for (const auto& column : fColumnDescriptors) {
587 auto columnId = column.first;
588 pos += SerializeUInt64(columnId, *where);
589
590 const auto &columnRange = cluster.second.GetColumnRange(columnId);
591 R__ASSERT(columnRange.fColumnId == columnId);
592 pos += SerializeColumnRange(columnRange, *where);
593
594 const auto &pageRange = cluster.second.GetPageRange(columnId);
595 R__ASSERT(pageRange.fColumnId == columnId);
596 auto nPages = pageRange.fPageInfos.size();
597 pos += SerializeUInt32(nPages, *where);
598 for (unsigned int i = 0; i < nPages; ++i) {
599 pos += SerializePageInfo(pageRange.fPageInfos[i], *where);
600 }
601 }
602 }
603
604 // The next 16 bytes make the ntuple's postscript
605 pos += SerializeUInt16(kFrameVersionCurrent, *where);
606 pos += SerializeUInt16(kFrameVersionMin, *where);
607 // Add the CRC32 bytes to the header and footer sizes
608 pos += SerializeUInt32(SerializeHeader(nullptr), *where);
609 std::uint32_t size = pos - base + 4;
610 pos += SerializeUInt32(size + 4, *where);
611 size += SerializeCrc32(base, size, *where);
612
613 return size;
614}
615
616
618 const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
619{
620 auto pos = reinterpret_cast<const unsigned char *>(postscript);
621 std::uint16_t dummy;
622 pos += DeserializeUInt16(pos, &dummy);
623 pos += DeserializeUInt16(pos, &dummy);
624 pos += DeserializeUInt32(pos, &szHeader);
625 pos += DeserializeUInt32(pos, &szFooter);
626}
627
628
630{
631 NTupleSize_t result = 0;
632 for (const auto &cd : fClusterDescriptors) {
633 result = std::max(result, cd.second.GetFirstEntryIndex() + cd.second.GetNEntries());
634 }
635 return result;
636}
637
639{
640 NTupleSize_t result = 0;
641 for (const auto &cd : fClusterDescriptors) {
642 auto columnRange = cd.second.GetColumnRange(columnId);
643 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
644 }
645 return result;
646}
647
650{
651 std::string leafName(fieldName);
652 auto posDot = leafName.find_last_of('.');
653 if (posDot != std::string::npos) {
654 auto parentName = leafName.substr(0, posDot);
655 leafName = leafName.substr(posDot + 1);
656 parentId = FindFieldId(parentName, parentId);
657 }
658 for (const auto &fd : fFieldDescriptors) {
659 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
660 return fd.second.GetId();
661 }
663}
664
665
667{
668 auto rootId = FindFieldId("", kInvalidDescriptorId);
669 return FindFieldId(fieldName, rootId);
670}
671
672
675{
676 for (const auto &cd : fColumnDescriptors) {
677 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
678 return cd.second.GetId();
679 }
681}
682
683
686{
687 // TODO(jblomer): binary search?
688 for (const auto &cd : fClusterDescriptors) {
689 auto columnRange = cd.second.GetColumnRange(columnId);
690 if (columnRange.Contains(index))
691 return cd.second.GetId();
692 }
694}
695
696
697std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleDescriptor::GenerateModel() const
698{
699 auto model = std::make_unique<RNTupleModel>();
700 auto rootId = FindFieldId("", kInvalidDescriptorId);
701 const auto &rootDesc = GetFieldDescriptor(rootId);
702 for (const auto id : rootDesc.GetLinkIds()) {
703 const auto &topDesc = GetFieldDescriptor(id);
704 auto field = Detail::RFieldBase::Create(topDesc.GetFieldName(), topDesc.GetTypeName());
705 model->AddField(std::unique_ptr<Detail::RFieldBase>(field));
706 }
707 return model;
708}
709
710
711////////////////////////////////////////////////////////////////////////////////
712
713
715{
716 RNTupleDescriptor result;
717 std::swap(result, fDescriptor);
718 return result;
719}
720
722{
723 auto pos = reinterpret_cast<unsigned char *>(headerBuffer);
724 auto base = pos;
725
726 std::uint32_t frameSize;
727 pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, base, &frameSize);
728 VerifyCrc32(base, frameSize);
729 std::uint64_t reserved;
730 pos += DeserializeUInt64(pos, &reserved);
731
732 pos += DeserializeString(pos, &fDescriptor.fName);
733 pos += DeserializeString(pos, &fDescriptor.fDescription);
734 pos += DeserializeString(pos, &fDescriptor.fAuthor);
735 pos += DeserializeString(pos, &fDescriptor.fCustodian);
736 pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampData);
737 pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampWritten);
738 pos += DeserializeVersion(pos, &fDescriptor.fVersion);
739 pos += DeserializeUuid(pos, &fDescriptor.fOwnUuid);
740 pos += DeserializeUuid(pos, &fDescriptor.fGroupUuid);
741
742 std::uint32_t nFields;
743 pos += DeserializeUInt32(pos, &nFields);
744 for (std::uint32_t i = 0; i < nFields; ++i) {
745 auto fieldBase = pos;
746 pos += DeserializeFrame(RFieldDescriptor::kFrameVersionCurrent, fieldBase, &frameSize);
747
749 pos += DeserializeUInt64(pos, &f.fFieldId);
750 pos += DeserializeVersion(pos, &f.fFieldVersion);
751 pos += DeserializeVersion(pos, &f.fTypeVersion);
752 pos += DeserializeString(pos, &f.fFieldName);
753 pos += DeserializeString(pos, &f.fFieldDescription);
754 pos += DeserializeString(pos, &f.fTypeName);
755 pos += DeserializeUInt64(pos, &f.fNRepetitions);
756 std::int32_t structure;
757 pos += DeserializeInt32(pos, &structure);
758 f.fStructure = static_cast<ENTupleStructure>(structure);
759 pos += DeserializeUInt64(pos, &f.fParentId);
760
761 std::uint32_t nLinks;
762 pos += DeserializeUInt32(pos, &nLinks);
763 f.fLinkIds.resize(nLinks);
764 for (std::uint32_t j = 0; j < nLinks; ++j) {
765 pos += DeserializeUInt64(pos, &f.fLinkIds[j]);
766 }
767
768 pos = fieldBase + frameSize;
769 fDescriptor.fFieldDescriptors.emplace(f.fFieldId, std::move(f));
770 }
771
772 std::uint32_t nColumns;
773 pos += DeserializeUInt32(pos, &nColumns);
774 for (std::uint32_t i = 0; i < nColumns; ++i) {
775 auto columnBase = pos;
776 pos += DeserializeFrame(RColumnDescriptor::kFrameVersionCurrent, columnBase, &frameSize);
777
779 pos += DeserializeUInt64(pos, &c.fColumnId);
780 pos += DeserializeVersion(pos, &c.fVersion);
781 pos += DeserializeColumnModel(pos, &c.fModel);
782 pos += DeserializeUInt64(pos, &c.fFieldId);
783 pos += DeserializeUInt32(pos, &c.fIndex);
784
785 pos = columnBase + frameSize;
786 fDescriptor.fColumnDescriptors.emplace(c.fColumnId, std::move(c));
787 }
788}
789
791 auto pos = reinterpret_cast<unsigned char *>(footerBuffer);
792 auto base = pos;
793
794 std::uint32_t frameSize;
795 pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, pos, &frameSize);
796 VerifyCrc32(base, frameSize);
797 std::uint64_t reserved;
798 pos += DeserializeUInt64(pos, &reserved);
799
800 std::uint64_t nClusters;
801 pos += DeserializeUInt64(pos, &nClusters);
802 for (std::uint64_t i = 0; i < nClusters; ++i) {
803 RNTupleUuid uuid;
804 pos += DeserializeUuid(pos, &uuid);
805 R__ASSERT(uuid == fDescriptor.fOwnUuid);
806 auto clusterBase = pos;
807 pos += DeserializeFrame(RClusterDescriptor::kFrameVersionCurrent, clusterBase, &frameSize);
808
809 std::uint64_t clusterId;
810 RNTupleVersion version;
811 std::uint64_t firstEntry;
812 std::uint64_t nEntries;
813 pos += DeserializeUInt64(pos, &clusterId);
814 pos += DeserializeVersion(pos, &version);
815 pos += DeserializeUInt64(pos, &firstEntry);
816 pos += DeserializeUInt64(pos, &nEntries);
817 AddCluster(clusterId, version, firstEntry, ROOT::Experimental::ClusterSize_t(nEntries));
819 pos += DeserializeLocator(pos, &locator);
820 SetClusterLocator(clusterId, locator);
821
822 pos = clusterBase + frameSize;
823
824 std::uint32_t nColumns;
825 pos += DeserializeUInt32(pos, &nColumns);
826 for (std::uint32_t j = 0; j < nColumns; ++j) {
827 uint64_t columnId;
828 pos += DeserializeUInt64(pos, &columnId);
829
831 columnRange.fColumnId = columnId;
832 pos += DeserializeColumnRange(pos, &columnRange);
833 AddClusterColumnRange(clusterId, columnRange);
834
836 pageRange.fColumnId = columnId;
837 uint32_t nPages;
838 pos += DeserializeUInt32(pos, &nPages);
839 for (unsigned int k = 0; k < nPages; ++k) {
841 pos += DeserializePageInfo(pos, &pageInfo);
842 pageRange.fPageInfos.emplace_back(pageInfo);
843 }
844 AddClusterPageRange(clusterId, std::move(pageRange));
845 }
846 }
847}
848
850 const std::string_view name, const std::string_view description, const std::string_view author,
851 const RNTupleVersion &version, const RNTupleUuid &uuid)
852{
853 fDescriptor.fName = std::string(name);
854 fDescriptor.fDescription = std::string(description);
855 fDescriptor.fAuthor = std::string(author);
856 fDescriptor.fVersion = version;
857 fDescriptor.fOwnUuid = uuid;
858 fDescriptor.fGroupUuid = uuid;
859}
860
862 DescriptorId_t fieldId, const RNTupleVersion &fieldVersion, const RNTupleVersion &typeVersion,
863 std::string_view fieldName, std::string_view typeName, std::uint64_t nRepetitions, ENTupleStructure structure)
864{
866 f.fFieldId = fieldId;
867 f.fFieldVersion = fieldVersion;
868 f.fTypeVersion = typeVersion;
869 f.fFieldName = std::string(fieldName);
870 f.fTypeName = std::string(typeName);
871 f.fNRepetitions = nRepetitions;
872 f.fStructure = structure;
873 fDescriptor.fFieldDescriptors.emplace(fieldId, std::move(f));
874}
875
877{
878 R__ASSERT(fDescriptor.fFieldDescriptors[linkId].fParentId == kInvalidDescriptorId);
879 fDescriptor.fFieldDescriptors[linkId].fParentId = fieldId;
880 fDescriptor.fFieldDescriptors[fieldId].fLinkIds.push_back(linkId);
881}
882
884 DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model,
885 std::uint32_t index)
886{
888 c.fColumnId = columnId;
889 c.fFieldId = fieldId;
890 c.fVersion = version;
891 c.fModel = model;
892 c.fIndex = index;
893 fDescriptor.fColumnDescriptors.emplace(columnId, std::move(c));
894}
895
897 DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
898{
900 c.fClusterId = clusterId;
901 c.fVersion = version;
902 c.fFirstEntryIndex = firstEntryIndex;
903 c.fNEntries = nEntries;
904 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(c));
905}
906
909{
910 fDescriptor.fClusterDescriptors[clusterId].fLocator = locator;
911}
912
914 DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
915{
916 fDescriptor.fClusterDescriptors[clusterId].fColumnRanges[columnRange.fColumnId] = columnRange;
917}
918
920 DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
921{
922 fDescriptor.fClusterDescriptors[clusterId].fPageRanges.emplace(pageRange.fColumnId, std::move(pageRange));
923}
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
static RooMathCoreReg dummy
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
static RFieldBase * Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
Definition: RField.cxx:139
The available trivial, native content types of a column.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
RLocator fLocator
For pre-fetching / caching an entire contiguous cluster.
static constexpr std::uint16_t kFrameVersionMin
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
Meta-data stored for every column of an ntuple.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
const std::vector< DescriptorId_t > & GetLinkIds() const
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
void AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
void SetClusterLocator(DescriptorId_t clusterId, RClusterDescriptor::RLocator locator)
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void AddField(DescriptorId_t fieldId, const RNTupleVersion &fieldVersion, const RNTupleVersion &typeVersion, std::string_view fieldName, std::string_view typeName, std::uint64_t nRepetitions, ENTupleStructure structure)
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
bool operator==(const RNTupleDescriptor &other) const
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::uint32_t GetVersionUse() const
NTupleFlags_t GetFlags() const
std::uint32_t GetVersionMin() const
struct void * fTypeName
Definition: cppyy.h:9
basic_string_view< char > string_view
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:32
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:78
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:79
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
Generic information about the physical location of data.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:45
auto * l
Definition: textangle.C:4