Logo ROOT   master
Reference Guide
RNTupleDescriptor.cxx
Go to the documentation of this file.
1 /// \file RNTupleDescriptor.cxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-10-04
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
17 #include <ROOT/RNTupleModel.hxx>
18 #include <ROOT/RNTupleUtil.hxx>
19 #include <ROOT/RStringView.hxx>
20 
21 #include <RZip.h>
22 #include <TError.h>
23 
24 #include <algorithm>
25 #include <cstdint>
26 #include <cstring>
27 #include <iostream>
28 #include <utility>
29 
30 namespace {
31 
32 /// The machine-independent serialization of meta-data wraps the header and footer as well as sub structures in
33 /// frames. The frame layout is
34 ///
35 /// -----------------------------------------------------------
36 /// | TYPE | DESCRIPTION |
37 /// |----------------------------------------------------------
38 /// | std::uint16_t | Version used to write the frame |
39 /// | std::uint16_t | Minimum version for reading the frame |
40 /// | std::uint32_t | Length of the frame incl. preamble |
41 /// -----------------------------------------------------------
42 ///
43 /// In addition, the header and footer store a 4 byte CRC32 checksum of the frame immediately after the frame.
44 /// The footer also repeats the frame size just before the CRC32 checksum. That means, one can read the last 8 bytes
45 /// to determine the footer length, and the first 8 bytes to determine the header length.
46 ///
47 /// Within the frames, integers of different lengths are stored in a machine-independent representation. Strings and
48 /// vectors store the number of items followed by the items. Time stamps are stored in number of seconds since the
49 /// UNIX epoch.
50 
51 std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
52 {
53  if (buffer != nullptr) {
54  auto bytes = reinterpret_cast<unsigned char *>(buffer);
55  bytes[0] = (val & 0x00000000000000FF);
56  bytes[1] = (val & 0x000000000000FF00) >> 8;
57  bytes[2] = (val & 0x0000000000FF0000) >> 16;
58  bytes[3] = (val & 0x00000000FF000000) >> 24;
59  bytes[4] = (val & 0x000000FF00000000) >> 32;
60  bytes[5] = (val & 0x0000FF0000000000) >> 40;
61  bytes[6] = (val & 0x00FF000000000000) >> 48;
62  bytes[7] = (val & 0xFF00000000000000) >> 56;
63  }
64  return 8;
65 }
66 
67 std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
68 {
69  return SerializeInt64(val, buffer);
70 }
71 
72 std::uint32_t DeserializeInt64(const void *buffer, std::int64_t *val)
73 {
74  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
75  *val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
76  (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
77  (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
78  (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
79  return 8;
80 }
81 
82 std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t *val)
83 {
84  return DeserializeInt64(buffer, reinterpret_cast<std::int64_t *>(val));
85 }
86 
87 std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
88 {
89  if (buffer != nullptr) {
90  auto bytes = reinterpret_cast<unsigned char *>(buffer);
91  bytes[0] = (val & 0x000000FF);
92  bytes[1] = (val & 0x0000FF00) >> 8;
93  bytes[2] = (val & 0x00FF0000) >> 16;
94  bytes[3] = (val & 0xFF000000) >> 24;
95  }
96  return 4;
97 }
98 
99 std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
100 {
101  return SerializeInt32(val, buffer);
102 }
103 
104 std::uint32_t DeserializeInt32(const void *buffer, std::int32_t *val)
105 {
106  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
107  *val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
108  (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
109  return 4;
110 }
111 
112 std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t *val)
113 {
114  return DeserializeInt32(buffer, reinterpret_cast<std::int32_t *>(val));
115 }
116 
117 std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
118 {
119  if (buffer != nullptr) {
120  auto bytes = reinterpret_cast<unsigned char *>(buffer);
121  bytes[0] = (val & 0x00FF);
122  bytes[1] = (val & 0xFF00) >> 8;
123  }
124  return 2;
125 }
126 
127 std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
128 {
129  return SerializeInt16(val, buffer);
130 }
131 
132 std::uint32_t DeserializeInt16(const void *buffer, std::int16_t *val)
133 {
134  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
135  *val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
136  return 2;
137 }
138 
139 std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t *val)
140 {
141  return DeserializeInt16(buffer, reinterpret_cast<std::int16_t *>(val));
142 }
143 
144 std::uint32_t SerializeClusterSize(ROOT::Experimental::ClusterSize_t val, void *buffer)
145 {
146  return SerializeUInt32(val, buffer);
147 }
148 
149 std::uint32_t DeserializeClusterSize(const void *buffer, ROOT::Experimental::ClusterSize_t *val)
150 {
151  std::uint32_t size;
152  auto nbytes = DeserializeUInt32(buffer, &size);
153  *val = size;
154  return nbytes;
155 }
156 
157 std::uint32_t SerializeString(const std::string &val, void *buffer)
158 {
159  if (buffer != nullptr) {
160  auto pos = reinterpret_cast<unsigned char *>(buffer);
161  pos += SerializeUInt32(val.length(), pos);
162  memcpy(pos, val.data(), val.length());
163  }
164  return SerializeUInt32(val.length(), nullptr) + val.length();
165 }
166 
167 std::uint32_t DeserializeString(const void *buffer, std::string *val)
168 {
169  auto base = reinterpret_cast<const unsigned char *>(buffer);
170  auto bytes = base;
171  std::uint32_t length;
172  bytes += DeserializeUInt32(buffer, &length);
173  val->resize(length);
174  memcpy(&(*val)[0], bytes, length);
175  return bytes + length - base;
176 }
177 
178 std::uint32_t SerializeLocator(const ROOT::Experimental::RClusterDescriptor::RLocator &val, void *buffer)
179 {
180  // In order to keep the meta-data small, we don't wrap the locator in a frame
181  if (buffer != nullptr) {
182  auto pos = reinterpret_cast<unsigned char *>(buffer);
183  pos += SerializeInt64(val.fPosition, pos);
184  pos += SerializeUInt32(val.fBytesOnStorage, pos);
185  pos += SerializeString(val.fUrl, pos);
186  }
187  return SerializeString(val.fUrl, nullptr) + 12;
188 }
189 
190 std::uint32_t DeserializeLocator(const void *buffer, ROOT::Experimental::RClusterDescriptor::RLocator *val)
191 {
192  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
193  bytes += DeserializeInt64(bytes, &val->fPosition);
194  bytes += DeserializeUInt32(bytes, &val->fBytesOnStorage);
195  bytes += DeserializeString(bytes, &val->fUrl);
196  return SerializeString(val->fUrl, nullptr) + 12;
197 }
198 
199 std::uint32_t SerializeFrame(std::uint16_t protocolVersionCurrent, std::uint16_t protocolVersionMin, void *buffer,
200  void **ptrSize)
201 {
202  if (buffer != nullptr) {
203  auto pos = reinterpret_cast<unsigned char *>(buffer);
204  pos += SerializeUInt16(protocolVersionCurrent, pos); // The protocol version used to write the structure
205  pos += SerializeUInt16(protocolVersionMin, pos); // The minimum protocol version required to read the data
206  *ptrSize = pos;
207  pos += SerializeUInt32(0, pos); // placeholder for the size of the frame
208  }
209  return 8;
210 }
211 
212 std::uint32_t DeserializeFrame(std::uint16_t protocolVersion, const void *buffer, std::uint32_t *size)
213 {
214  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
215  std::uint16_t protocolVersionAtWrite;
216  std::uint16_t protocolVersionMinRequired;
217  bytes += DeserializeUInt16(bytes, &protocolVersionAtWrite);
218  bytes += DeserializeUInt16(bytes, &protocolVersionMinRequired);
219  R__ASSERT(protocolVersionAtWrite >= protocolVersionMinRequired);
220  R__ASSERT(protocolVersion >= protocolVersionMinRequired);
221  bytes += DeserializeUInt32(bytes, size);
222  return 8;
223 }
224 
225 std::uint32_t SerializeVersion(const ROOT::Experimental::RNTupleVersion &val, void *buffer)
226 {
227  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
228  auto pos = base;
229  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
230 
231  void *ptrSize = nullptr;
232  pos += SerializeFrame(0, 0, *where, &ptrSize);
233 
234  pos += SerializeUInt32(val.GetVersionUse(), *where);
235  pos += SerializeUInt32(val.GetVersionMin(), *where);
236  pos += SerializeUInt64(val.GetFlags(), *where);
237 
238  auto size = pos - base;
239  SerializeUInt32(size, ptrSize);
240  return size;
241 }
242 
243 std::uint32_t DeserializeVersion(const void *buffer, ROOT::Experimental::RNTupleVersion *version)
244 {
245  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
246  std::uint32_t frameSize;
247  bytes += DeserializeFrame(0, bytes, &frameSize);
248 
249  std::uint32_t versionUse;
250  std::uint32_t versionMin;
251  std::uint64_t flags;
252  bytes += DeserializeUInt32(bytes, &versionUse);
253  bytes += DeserializeUInt32(bytes, &versionMin);
254  bytes += DeserializeUInt64(bytes, &flags);
255  *version = ROOT::Experimental::RNTupleVersion(versionUse, versionMin, flags);
256 
257  return frameSize;
258 }
259 
260 std::uint32_t SerializeUuid(const ROOT::Experimental::RNTupleUuid &val, void *buffer)
261 {
262  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
263  auto pos = base;
264  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
265 
266  void *ptrSize = nullptr;
267  pos += SerializeFrame(0, 0, *where, &ptrSize);
268 
269  pos += SerializeString(val, *where);
270 
271  auto size = pos - base;
272  SerializeUInt32(size, ptrSize);
273  return size;
274 }
275 
276 std::uint32_t DeserializeUuid(const void *buffer, ROOT::Experimental::RNTupleUuid *uuid)
277 {
278  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
279  std::uint32_t frameSize;
280  bytes += DeserializeFrame(0, bytes, &frameSize);
281 
282  bytes += DeserializeString(bytes, uuid);
283 
284  return frameSize;
285 }
286 
287 std::uint32_t SerializeColumnModel(const ROOT::Experimental::RColumnModel &val, void *buffer)
288 {
289  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
290  auto pos = base;
291  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
292 
293  void *ptrSize = nullptr;
294  pos += SerializeFrame(0, 0, *where, &ptrSize);
295 
296  pos += SerializeInt32(static_cast<int>(val.GetType()), *where);
297  pos += SerializeInt32(static_cast<int>(val.GetIsSorted()), *where);
298 
299  auto size = pos - base;
300  SerializeUInt32(size, ptrSize);
301  return size;
302 }
303 
304 std::uint32_t DeserializeColumnModel(const void *buffer, ROOT::Experimental::RColumnModel *columnModel)
305 {
306  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
307  std::uint32_t frameSize;
308  bytes += DeserializeFrame(0, bytes, &frameSize);
309 
310  std::int32_t type;
311  std::int32_t isSorted;
312  bytes += DeserializeInt32(bytes, &type);
313  bytes += DeserializeInt32(bytes, &isSorted);
314  *columnModel = ROOT::Experimental::RColumnModel(static_cast<ROOT::Experimental::EColumnType>(type), isSorted);
315 
316  return frameSize;
317 }
318 
319 std::uint32_t SerializeTimeStamp(const std::chrono::system_clock::time_point &val, void *buffer)
320 {
321  return SerializeInt64(std::chrono::system_clock::to_time_t(val), buffer);
322 }
323 
324 std::uint32_t DeserializeTimeStamp(const void *buffer, std::chrono::system_clock::time_point *timeStamp)
325 {
326  std::int64_t secSinceUnixEpoch;
327  auto size = DeserializeInt64(buffer, &secSinceUnixEpoch);
328  *timeStamp = std::chrono::system_clock::from_time_t(secSinceUnixEpoch);
329  return size;
330 }
331 
332 std::uint32_t SerializeColumnRange(const ROOT::Experimental::RClusterDescriptor::RColumnRange &val, void *buffer)
333 {
334  // To keep the cluster footers small, we don't put a frame around individual column ranges.
335  if (buffer != nullptr) {
336  auto pos = reinterpret_cast<unsigned char *>(buffer);
337  // The column id is stored in SerializeFooter() for the column range and the page range altogether
338  pos += SerializeUInt64(val.fFirstElementIndex, pos);
339  pos += SerializeClusterSize(val.fNElements, pos);
340  pos += SerializeInt64(val.fCompressionSettings, pos);
341  }
342  return 20;
343 }
344 
345 std::uint32_t DeserializeColumnRange(const void *buffer,
347 {
348  auto bytes = reinterpret_cast<const unsigned char *>(buffer);
349  // The column id is set elsewhere (see AddClustersFromFooter())
350  bytes += DeserializeUInt64(bytes, &columnRange->fFirstElementIndex);
351  bytes += DeserializeClusterSize(bytes, &columnRange->fNElements);
352  bytes += DeserializeInt64(bytes, &columnRange->fCompressionSettings);
353  return 20;
354 }
355 
356 std::uint32_t SerializePageInfo(const ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo &val, void *buffer)
357 {
358  // To keep the cluster footers small, we don't put a frame around individual page infos.
359  if (buffer != nullptr) {
360  auto pos = reinterpret_cast<unsigned char *>(buffer);
361  // The column id is stored in SerializeFooter() for the column range and the page range altogether
362  pos += SerializeClusterSize(val.fNElements, pos);
363  pos += SerializeLocator(val.fLocator, pos);
364  }
365  return 4 + SerializeLocator(val.fLocator, nullptr);
366 }
367 
368 std::uint32_t DeserializePageInfo(const void *buffer,
370 {
371  auto base = reinterpret_cast<const unsigned char *>(buffer);
372  auto bytes = base;
373  // The column id is set elsewhere (see AddClustersFromFooter())
374  bytes += DeserializeClusterSize(bytes, &pageInfo->fNElements);
375  bytes += DeserializeLocator(bytes, &pageInfo->fLocator);
376  return bytes - base;
377 }
378 
379 std::uint32_t SerializeCrc32(const unsigned char *data, std::uint32_t length, void *buffer)
380 {
381  auto checksum = R__crc32(0, nullptr, 0);
382  if (buffer != nullptr) {
383  checksum = R__crc32(checksum, data, length);
384  SerializeUInt32(checksum, buffer);
385  }
386  return 4;
387 }
388 
389 void VerifyCrc32(const unsigned char *data, std::uint32_t length)
390 {
391  auto checksumReal = R__crc32(0, nullptr, 0);
392  checksumReal = R__crc32(checksumReal, data, length);
393  std::uint32_t checksumFound;
394  DeserializeUInt32(data + length, &checksumFound);
395  R__ASSERT(checksumFound == checksumReal);
396 }
397 
398 std::uint32_t SerializeField(const ROOT::Experimental::RFieldDescriptor &val, void *buffer)
399 {
400  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
401  auto pos = base;
402  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
403 
404  void *ptrSize = nullptr;
407 
408  pos += SerializeUInt64(val.GetId(), *where);
409  pos += SerializeVersion(val.GetFieldVersion(), *where);
410  pos += SerializeVersion(val.GetTypeVersion(), *where);
411  pos += SerializeString(val.GetFieldName(), *where);
412  pos += SerializeString(val.GetFieldDescription(), *where);
413  pos += SerializeString(val.GetTypeName(), *where);
414  pos += SerializeUInt64(val.GetNRepetitions(), *where);
415  pos += SerializeUInt32(static_cast<int>(val.GetStructure()), *where);
416  pos += SerializeUInt64(val.GetParentId(), *where);
417  pos += SerializeUInt32(val.GetLinkIds().size(), *where);
418  for (const auto& l : val.GetLinkIds())
419  pos += SerializeUInt64(l, *where);
420 
421  auto size = pos - base;
422  SerializeUInt32(size, ptrSize);
423  return size;
424 }
425 
426 std::uint32_t SerializeColumn(const ROOT::Experimental::RColumnDescriptor &val, void *buffer)
427 {
428  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
429  auto pos = base;
430  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
431 
432  void *ptrSize = nullptr;
435 
436  pos += SerializeUInt64(val.GetId(), *where);
437  pos += SerializeVersion(val.GetVersion(), *where);
438  pos += SerializeColumnModel(val.GetModel(), *where);
439  pos += SerializeUInt64(val.GetFieldId(), *where);
440  pos += SerializeUInt32(val.GetIndex(), *where);
441 
442  auto size = pos - base;
443  SerializeUInt32(size, ptrSize);
444  return size;
445 }
446 
447 std::uint32_t SerializeClusterSummary(const ROOT::Experimental::RClusterDescriptor &val, void *buffer)
448 {
449  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
450  auto pos = base;
451  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
452 
453  void *ptrSize = nullptr;
456 
457  pos += SerializeUInt64(val.GetId(), *where);
458  pos += SerializeVersion(val.GetVersion(), *where);
459  pos += SerializeUInt64(val.GetFirstEntryIndex(), *where);
460  pos += SerializeUInt64(val.GetNEntries(), *where);
461  pos += SerializeLocator(val.GetLocator(), *where);
462 
463  auto size = pos - base;
464  SerializeUInt32(size, ptrSize);
465  return size;
466 }
467 
468 } // anonymous namespace
469 
470 
471 ////////////////////////////////////////////////////////////////////////////////
472 
473 
475  return fFieldId == other.fFieldId &&
476  fFieldVersion == other.fFieldVersion &&
477  fTypeVersion == other.fTypeVersion &&
478  fFieldName == other.fFieldName &&
480  fTypeName == other.fTypeName &&
481  fNRepetitions == other.fNRepetitions &&
482  fStructure == other.fStructure &&
483  fParentId == other.fParentId &&
484  fLinkIds == other.fLinkIds;
485 }
486 
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 
490 
492  return fColumnId == other.fColumnId &&
493  fVersion == other.fVersion &&
494  fModel == other.fModel &&
495  fFieldId == other.fFieldId &&
496  fIndex == other.fIndex;
497 }
498 
499 
500 ////////////////////////////////////////////////////////////////////////////////
501 
502 
504  return fClusterId == other.fClusterId &&
505  fVersion == other.fVersion &&
506  fFirstEntryIndex == other.fFirstEntryIndex &&
507  fNEntries == other.fNEntries &&
508  fLocator == other.fLocator &&
509  fColumnRanges == other.fColumnRanges &&
510  fPageRanges == other.fPageRanges;
511 }
512 
513 
514 ////////////////////////////////////////////////////////////////////////////////
515 
516 
518  return fName == other.fName &&
519  fDescription == other.fDescription &&
520  fAuthor == other.fAuthor &&
521  fCustodian == other.fCustodian &&
522  fTimeStampData == other.fTimeStampData &&
523  fTimeStampWritten == other.fTimeStampWritten &&
524  fVersion == other.fVersion &&
525  fOwnUuid == other.fOwnUuid &&
526  fGroupUuid == other.fGroupUuid &&
527  fFieldDescriptors == other.fFieldDescriptors &&
528  fColumnDescriptors == other.fColumnDescriptors &&
529  fClusterDescriptors == other.fClusterDescriptors;
530 }
531 
532 
534 {
535  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
536  auto pos = base;
537  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
538 
539  void *ptrSize = nullptr;
540  pos += SerializeFrame(
542  pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
543 
544  pos += SerializeString(fName, *where);
545  pos += SerializeString(fDescription, *where);
546  pos += SerializeString(fAuthor, *where);
547  pos += SerializeString(fCustodian, *where);
548  pos += SerializeTimeStamp(fTimeStampData, *where);
549  pos += SerializeTimeStamp(fTimeStampWritten, *where);
550  pos += SerializeVersion(fVersion, *where);
551  pos += SerializeUuid(fOwnUuid, *where);
552  pos += SerializeUuid(fGroupUuid, *where);
553  pos += SerializeUInt32(fFieldDescriptors.size(), *where);
554  for (const auto& f : fFieldDescriptors) {
555  pos += SerializeField(f.second, *where);
556  }
557  pos += SerializeUInt32(fColumnDescriptors.size(), *where);
558  for (const auto& c : fColumnDescriptors) {
559  pos += SerializeColumn(c.second, *where);
560  }
561 
562  std::uint32_t size = pos - base;
563  SerializeUInt32(size, ptrSize);
564  size += SerializeCrc32(base, size, *where);
565 
566  return size;
567 }
568 
570 {
571  auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
572  auto pos = base;
573  void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
574 
575  void *ptrSize = nullptr;
576  pos += SerializeFrame(
578  pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
579 
580  pos += SerializeUInt64(fClusterDescriptors.size(), *where);
581  for (const auto& cluster : fClusterDescriptors) {
582  pos += SerializeUuid(fOwnUuid, *where); // in order to verify that header and footer belong together
583  pos += SerializeClusterSummary(cluster.second, *where);
584 
585  pos += SerializeUInt32(fColumnDescriptors.size(), *where);
586  for (const auto& column : fColumnDescriptors) {
587  auto columnId = column.first;
588  pos += SerializeUInt64(columnId, *where);
589 
590  const auto &columnRange = cluster.second.GetColumnRange(columnId);
591  R__ASSERT(columnRange.fColumnId == columnId);
592  pos += SerializeColumnRange(columnRange, *where);
593 
594  const auto &pageRange = cluster.second.GetPageRange(columnId);
595  R__ASSERT(pageRange.fColumnId == columnId);
596  auto nPages = pageRange.fPageInfos.size();
597  pos += SerializeUInt32(nPages, *where);
598  for (unsigned int i = 0; i < nPages; ++i) {
599  pos += SerializePageInfo(pageRange.fPageInfos[i], *where);
600  }
601  }
602  }
603 
604  // The next 16 bytes make the ntuple's postscript
605  pos += SerializeUInt16(kFrameVersionCurrent, *where);
606  pos += SerializeUInt16(kFrameVersionMin, *where);
607  // Add the CRC32 bytes to the header and footer sizes
608  pos += SerializeUInt32(SerializeHeader(nullptr), *where);
609  std::uint32_t size = pos - base + 4;
610  pos += SerializeUInt32(size + 4, *where);
611  size += SerializeCrc32(base, size, *where);
612 
613  return size;
614 }
615 
616 
618  const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
619 {
620  auto pos = reinterpret_cast<const unsigned char *>(postscript);
621  std::uint16_t dummy;
622  pos += DeserializeUInt16(pos, &dummy);
623  pos += DeserializeUInt16(pos, &dummy);
624  pos += DeserializeUInt32(pos, &szHeader);
625  pos += DeserializeUInt32(pos, &szFooter);
626 }
627 
628 
630 {
631  NTupleSize_t result = 0;
632  for (const auto &cd : fClusterDescriptors) {
633  result = std::max(result, cd.second.GetFirstEntryIndex() + cd.second.GetNEntries());
634  }
635  return result;
636 }
637 
639 {
640  NTupleSize_t result = 0;
641  for (const auto &cd : fClusterDescriptors) {
642  auto columnRange = cd.second.GetColumnRange(columnId);
643  result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
644  }
645  return result;
646 }
647 
649 ROOT::Experimental::RNTupleDescriptor::FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
650 {
651  std::string leafName(fieldName);
652  auto posDot = leafName.find_last_of('.');
653  if (posDot != std::string::npos) {
654  auto parentName = leafName.substr(0, posDot);
655  leafName = leafName.substr(posDot + 1);
656  parentId = FindFieldId(parentName, parentId);
657  }
658  for (const auto &fd : fFieldDescriptors) {
659  if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
660  return fd.second.GetId();
661  }
662  return kInvalidDescriptorId;
663 }
664 
665 
667 {
668  auto rootId = FindFieldId("", kInvalidDescriptorId);
669  return FindFieldId(fieldName, rootId);
670 }
671 
672 
675 {
676  for (const auto &cd : fColumnDescriptors) {
677  if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
678  return cd.second.GetId();
679  }
680  return kInvalidDescriptorId;
681 }
682 
683 
686 {
687  // TODO(jblomer): binary search?
688  for (const auto &cd : fClusterDescriptors) {
689  auto columnRange = cd.second.GetColumnRange(columnId);
690  if (columnRange.Contains(index))
691  return cd.second.GetId();
692  }
693  return kInvalidDescriptorId;
694 }
695 
696 
697 std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleDescriptor::GenerateModel() const
698 {
699  auto model = std::make_unique<RNTupleModel>();
700  auto rootId = FindFieldId("", kInvalidDescriptorId);
701  const auto &rootDesc = GetFieldDescriptor(rootId);
702  for (const auto id : rootDesc.GetLinkIds()) {
703  const auto &topDesc = GetFieldDescriptor(id);
704  auto field = Detail::RFieldBase::Create(topDesc.GetFieldName(), topDesc.GetTypeName());
705  model->AddField(std::unique_ptr<Detail::RFieldBase>(field));
706  }
707  return model;
708 }
709 
710 
711 ////////////////////////////////////////////////////////////////////////////////
712 
713 
715 {
716  RNTupleDescriptor result;
717  std::swap(result, fDescriptor);
718  return result;
719 }
720 
722 {
723  auto pos = reinterpret_cast<unsigned char *>(headerBuffer);
724  auto base = pos;
725 
726  std::uint32_t frameSize;
727  pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, base, &frameSize);
728  VerifyCrc32(base, frameSize);
729  std::uint64_t reserved;
730  pos += DeserializeUInt64(pos, &reserved);
731 
732  pos += DeserializeString(pos, &fDescriptor.fName);
733  pos += DeserializeString(pos, &fDescriptor.fDescription);
734  pos += DeserializeString(pos, &fDescriptor.fAuthor);
735  pos += DeserializeString(pos, &fDescriptor.fCustodian);
736  pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampData);
737  pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampWritten);
738  pos += DeserializeVersion(pos, &fDescriptor.fVersion);
739  pos += DeserializeUuid(pos, &fDescriptor.fOwnUuid);
740  pos += DeserializeUuid(pos, &fDescriptor.fGroupUuid);
741 
742  std::uint32_t nFields;
743  pos += DeserializeUInt32(pos, &nFields);
744  for (std::uint32_t i = 0; i < nFields; ++i) {
745  auto fieldBase = pos;
746  pos += DeserializeFrame(RFieldDescriptor::kFrameVersionCurrent, fieldBase, &frameSize);
747 
749  pos += DeserializeUInt64(pos, &f.fFieldId);
750  pos += DeserializeVersion(pos, &f.fFieldVersion);
751  pos += DeserializeVersion(pos, &f.fTypeVersion);
752  pos += DeserializeString(pos, &f.fFieldName);
753  pos += DeserializeString(pos, &f.fFieldDescription);
754  pos += DeserializeString(pos, &f.fTypeName);
755  pos += DeserializeUInt64(pos, &f.fNRepetitions);
756  std::int32_t structure;
757  pos += DeserializeInt32(pos, &structure);
758  f.fStructure = static_cast<ENTupleStructure>(structure);
759  pos += DeserializeUInt64(pos, &f.fParentId);
760 
761  std::uint32_t nLinks;
762  pos += DeserializeUInt32(pos, &nLinks);
763  f.fLinkIds.resize(nLinks);
764  for (std::uint32_t j = 0; j < nLinks; ++j) {
765  pos += DeserializeUInt64(pos, &f.fLinkIds[j]);
766  }
767 
768  pos = fieldBase + frameSize;
769  fDescriptor.fFieldDescriptors.emplace(f.fFieldId, std::move(f));
770  }
771 
772  std::uint32_t nColumns;
773  pos += DeserializeUInt32(pos, &nColumns);
774  for (std::uint32_t i = 0; i < nColumns; ++i) {
775  auto columnBase = pos;
776  pos += DeserializeFrame(RColumnDescriptor::kFrameVersionCurrent, columnBase, &frameSize);
777 
779  pos += DeserializeUInt64(pos, &c.fColumnId);
780  pos += DeserializeVersion(pos, &c.fVersion);
781  pos += DeserializeColumnModel(pos, &c.fModel);
782  pos += DeserializeUInt64(pos, &c.fFieldId);
783  pos += DeserializeUInt32(pos, &c.fIndex);
784 
785  pos = columnBase + frameSize;
786  fDescriptor.fColumnDescriptors.emplace(c.fColumnId, std::move(c));
787  }
788 }
789 
791  auto pos = reinterpret_cast<unsigned char *>(footerBuffer);
792  auto base = pos;
793 
794  std::uint32_t frameSize;
795  pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, pos, &frameSize);
796  VerifyCrc32(base, frameSize);
797  std::uint64_t reserved;
798  pos += DeserializeUInt64(pos, &reserved);
799 
800  std::uint64_t nClusters;
801  pos += DeserializeUInt64(pos, &nClusters);
802  for (std::uint64_t i = 0; i < nClusters; ++i) {
803  RNTupleUuid uuid;
804  pos += DeserializeUuid(pos, &uuid);
805  R__ASSERT(uuid == fDescriptor.fOwnUuid);
806  auto clusterBase = pos;
807  pos += DeserializeFrame(RClusterDescriptor::kFrameVersionCurrent, clusterBase, &frameSize);
808 
809  std::uint64_t clusterId;
810  RNTupleVersion version;
811  std::uint64_t firstEntry;
812  std::uint64_t nEntries;
813  pos += DeserializeUInt64(pos, &clusterId);
814  pos += DeserializeVersion(pos, &version);
815  pos += DeserializeUInt64(pos, &firstEntry);
816  pos += DeserializeUInt64(pos, &nEntries);
817  AddCluster(clusterId, version, firstEntry, ROOT::Experimental::ClusterSize_t(nEntries));
819  pos += DeserializeLocator(pos, &locator);
820  SetClusterLocator(clusterId, locator);
821 
822  pos = clusterBase + frameSize;
823 
824  std::uint32_t nColumns;
825  pos += DeserializeUInt32(pos, &nColumns);
826  for (std::uint32_t j = 0; j < nColumns; ++j) {
827  uint64_t columnId;
828  pos += DeserializeUInt64(pos, &columnId);
829 
831  columnRange.fColumnId = columnId;
832  pos += DeserializeColumnRange(pos, &columnRange);
833  AddClusterColumnRange(clusterId, columnRange);
834 
836  pageRange.fColumnId = columnId;
837  uint32_t nPages;
838  pos += DeserializeUInt32(pos, &nPages);
839  for (unsigned int k = 0; k < nPages; ++k) {
841  pos += DeserializePageInfo(pos, &pageInfo);
842  pageRange.fPageInfos.emplace_back(pageInfo);
843  }
844  AddClusterPageRange(clusterId, std::move(pageRange));
845  }
846  }
847 }
848 
850  const std::string_view name, const std::string_view description, const std::string_view author,
851  const RNTupleVersion &version, const RNTupleUuid &uuid)
852 {
853  fDescriptor.fName = std::string(name);
854  fDescriptor.fDescription = std::string(description);
855  fDescriptor.fAuthor = std::string(author);
856  fDescriptor.fVersion = version;
857  fDescriptor.fOwnUuid = uuid;
858  fDescriptor.fGroupUuid = uuid;
859 }
860 
862  DescriptorId_t fieldId, const RNTupleVersion &fieldVersion, const RNTupleVersion &typeVersion,
863  std::string_view fieldName, std::string_view typeName, std::uint64_t nRepetitions, ENTupleStructure structure)
864 {
866  f.fFieldId = fieldId;
867  f.fFieldVersion = fieldVersion;
868  f.fTypeVersion = typeVersion;
869  f.fFieldName = std::string(fieldName);
870  f.fTypeName = std::string(typeName);
871  f.fNRepetitions = nRepetitions;
872  f.fStructure = structure;
873  fDescriptor.fFieldDescriptors.emplace(fieldId, std::move(f));
874 }
875 
877 {
878  R__ASSERT(fDescriptor.fFieldDescriptors[linkId].fParentId == kInvalidDescriptorId);
879  fDescriptor.fFieldDescriptors[linkId].fParentId = fieldId;
880  fDescriptor.fFieldDescriptors[fieldId].fLinkIds.push_back(linkId);
881 }
882 
884  DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model,
885  std::uint32_t index)
886 {
888  c.fColumnId = columnId;
889  c.fFieldId = fieldId;
890  c.fVersion = version;
891  c.fModel = model;
892  c.fIndex = index;
893  fDescriptor.fColumnDescriptors.emplace(columnId, std::move(c));
894 }
895 
897  DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
898 {
900  c.fClusterId = clusterId;
901  c.fVersion = version;
902  c.fFirstEntryIndex = firstEntryIndex;
903  c.fNEntries = nEntries;
904  fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(c));
905 }
906 
909 {
910  fDescriptor.fClusterDescriptors[clusterId].fLocator = locator;
911 }
912 
914  DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
915 {
916  fDescriptor.fClusterDescriptors[clusterId].fColumnRanges[columnRange.fColumnId] = columnRange;
917 }
918 
920  DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
921 {
922  fDescriptor.fClusterDescriptors[clusterId].fPageRanges.emplace(pageRange.fColumnId, std::move(pageRange));
923 }
void SetClusterLocator(DescriptorId_t clusterId, RClusterDescriptor::RLocator locator)
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
void AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree
RLocator fLocator
The meaning of fLocator depends on the storage backend.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
static constexpr std::uint16_t kFrameVersionMin
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system...
Definition: RNTupleUtil.hxx:32
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
#define R__ASSERT(e)
Definition: TError.h:96
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:78
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
RColumnModel fModel
Contains the column type and whether it is sorted.
std::string fTypeName
The C++ type that was used when writing the field.
#define f(i)
Definition: RSha256.hxx:104
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum...
bool operator==(const RNTupleDescriptor &other) const
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
The window of element indexes of a particular column in a particular cluster.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
std::string fCustodian
The current responsible for storing the data.
RLocator fLocator
For pre-fetching / caching an entire contiguous cluster.
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:45
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
std::string fDescription
Free text from the user.
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
static RFieldBase * Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
Definition: RField.cxx:100
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
NTupleSize_t fFirstElementIndex
A 64bit element index.
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
Generic information about the physical location of data.
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
Meta-data stored for every field of an ntuple
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint32_t GetVersionUse() const
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
const std::vector< DescriptorId_t > & GetLinkIds() const
static constexpr std::uint16_t kFrameVersionMin
Meta-data for a set of ntuple clusters
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
void AddField(DescriptorId_t fieldId, const RNTupleVersion &fieldVersion, const RNTupleVersion &typeVersion, std::string_view fieldName, std::string_view typeName, std::uint64_t nRepetitions, ENTupleStructure structure)
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr std::uint16_t kFrameVersionMin
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr...
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::string fFieldName
The leaf name, not including parent fields.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:79
NTupleFlags_t GetFlags() const
int type
Definition: TGX11.cxx:120
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
static RooMathCoreReg dummy
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT&#39;s built-in serialization in order to allow for use of RNTuple&#39;s witho...
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RFieldDescriptor &other) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
static constexpr std::uint16_t kFrameVersionMin
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id...
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint32_t GetVersionMin() const
auto * l
Definition: textangle.C:4
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
#define c(i)
Definition: RSha256.hxx:101
The on-storage meta-data of an ntuple
We do not need to store the element size / uncompressed page size because we know to which column the...
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
bool operator==(const RClusterDescriptor &other) const
std::string fAuthor
The origin of the data.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Meta-data stored for every column of an ntuple
char name[80]
Definition: TGX11.cxx:109
Records the parition of data into pages for a particular column in a particular cluster.
std::string fFieldDescription
Free text set by the user.
NTupleSize_t GetNElements(DescriptorId_t columnId) const