Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageDaos.cxx
Go to the documentation of this file.
1/// \file RPageStorageDaos.cxx
2/// \ingroup NTuple ROOT7
3/// \author Javier Lopez-Gomez <j.lopez@cern.ch>
4/// \date 2020-11-03
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RCluster.hxx>
17#include <ROOT/RClusterPool.hxx>
18#include <ROOT/RLogger.hxx>
20#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RNTupleZip.hxx>
25#include <ROOT/RPage.hxx>
27#include <ROOT/RPagePool.hxx>
28#include <ROOT/RDaos.hxx>
30
31#include <RVersion.h>
32#include <TError.h>
33
34#include <algorithm>
35#include <cstdio>
36#include <cstdlib>
37#include <cstring>
38#include <limits>
39#include <utility>
40#include <regex>
41#include <cassert>
42
43namespace {
50
51/// \brief RNTuple page-DAOS mappings
52enum EDaosMapping { kOidPerCluster, kOidPerPage };
53
54struct RDaosKey {
55 daos_obj_id_t fOid;
56 DistributionKey_t fDkey;
57 AttributeKey_t fAkey;
58};
59
60/// \brief Pre-defined keys for object store. `kDistributionKeyDefault` is the distribution key for metadata and
61/// pagelist values; optionally it can be used for ntuple pages (if under the `kOidPerPage` mapping strategy).
62/// `kAttributeKeyDefault` is the attribute key for ntuple pages under `kOidPerPage`.
63/// `kAttributeKey{Anchor,Header,Footer}` are the respective attribute keys for anchor/header/footer metadata elements.
64static constexpr DistributionKey_t kDistributionKeyDefault = 0x5a3c69f0cafe4a11;
65static constexpr AttributeKey_t kAttributeKeyDefault = 0x4243544b53444229;
66static constexpr AttributeKey_t kAttributeKeyAnchor = 0x4243544b5344422a;
67static constexpr AttributeKey_t kAttributeKeyHeader = 0x4243544b5344422b;
68static constexpr AttributeKey_t kAttributeKeyFooter = 0x4243544b5344422c;
69
70/// \brief Pre-defined 64 LSb of the OIDs for ntuple metadata (holds anchor/header/footer) and clusters' pagelists.
71static constexpr decltype(daos_obj_id_t::lo) kOidLowMetadata = -1;
72static constexpr decltype(daos_obj_id_t::lo) kOidLowPageList = -2;
73
74static constexpr daos_oclass_id_t kCidMetadata = OC_SX;
75
76static constexpr EDaosMapping kDefaultDaosMapping = kOidPerCluster;
77
78template <EDaosMapping mapping>
80 long unsigned columnId, long unsigned pageCount)
81{
82 if constexpr (mapping == kOidPerCluster) {
83 return RDaosKey{daos_obj_id_t{static_cast<decltype(daos_obj_id_t::lo)>(clusterId),
84 static_cast<decltype(daos_obj_id_t::hi)>(ntplId)},
85 static_cast<DistributionKey_t>(columnId), static_cast<AttributeKey_t>(pageCount)};
86 } else if constexpr (mapping == kOidPerPage) {
87 return RDaosKey{daos_obj_id_t{static_cast<decltype(daos_obj_id_t::lo)>(pageCount),
88 static_cast<decltype(daos_obj_id_t::hi)>(ntplId)},
90 }
91}
92
93struct RDaosURI {
94 /// \brief Label of the DAOS pool
95 std::string fPoolLabel;
96 /// \brief Label of the container for this RNTuple
97 std::string fContainerLabel;
98};
99
100/**
101 \brief Parse a DAOS RNTuple URI of the form 'daos://pool_id/container_id'.
102*/
103RDaosURI ParseDaosURI(std::string_view uri)
104{
105 std::regex re("daos://([^/]+)/(.+)");
106 std::cmatch m;
107 if (!std::regex_match(uri.data(), m, re))
108 throw ROOT::RException(R__FAIL("Invalid DAOS pool URI."));
109 return {m[1], m[2]};
110}
111
112/// \brief Unpacks a 64-bit RNTuple page locator address for object stores into a pair of 32-bit values:
113/// the attribute key under which the cage is stored and the offset within that cage to access the page.
114std::pair<uint32_t, uint32_t> DecodeDaosPagePosition(const ROOT::RNTupleLocatorObject64 &address)
115{
116 auto position = static_cast<uint32_t>(address.GetLocation() & 0xFFFFFFFF);
117 auto offset = static_cast<uint32_t>(address.GetLocation() >> 32);
118 return {position, offset};
119}
120
121/// \brief Packs an attribute key together with an offset within its contents into a single 64-bit address.
122/// The offset is kept in the MSb half and defaults to zero, which is the case when caging is disabled.
123ROOT::RNTupleLocatorObject64 EncodeDaosPagePosition(uint64_t position, uint64_t offset = 0)
124{
125 uint64_t address = (position & 0xFFFFFFFF) | (offset << 32);
126 return ROOT::RNTupleLocatorObject64{address};
127}
128
129/// \brief Helper structure concentrating the functionality required to locate an ntuple within a DAOS container.
130/// It includes a hashing function that converts the RNTuple's name into a 32-bit identifier; this value is used to
131/// index the subspace for the ntuple among all objects in the container. A zero-value hash value is reserved for
132/// storing any future metadata related to container-wide management; a zero-index ntuple is thus disallowed and
133/// remapped to "1". Once the index is computed, `InitNTupleDescriptorBuilder()` can be called to return a
134/// partially-filled builder with the ntuple's anchor, header and footer, lacking only pagelists. Upon that call,
135/// a copy of the anchor is stored in `fAnchor`.
136struct RDaosContainerNTupleLocator {
137 std::string fName{};
138 ntuple_index_t fIndex{};
139 std::optional<ROOT::Experimental::Internal::RDaosNTupleAnchor> fAnchor;
140 static const ntuple_index_t kReservedIndex = 0;
141
142 RDaosContainerNTupleLocator() = default;
143 explicit RDaosContainerNTupleLocator(const std::string &ntupleName) : fName(ntupleName), fIndex(Hash(ntupleName)){};
144
145 bool IsValid() { return fAnchor.has_value() && fAnchor->fNBytesHeader; }
146 [[nodiscard]] ntuple_index_t GetIndex() const { return fIndex; };
147 static ntuple_index_t Hash(const std::string &ntupleName)
148 {
149 // Convert string to numeric representation via `std::hash`.
150 uint64_t h = std::hash<std::string>{}(ntupleName);
151 // Fold the hash into 32-bit using `boost::hash_combine()` algorithm and magic number.
152 auto seed = static_cast<uint32_t>(h >> 32);
153 seed ^= static_cast<uint32_t>(h & 0xffffffff) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
154 auto hash = static_cast<ntuple_index_t>(seed);
155 return (hash == kReservedIndex) ? kReservedIndex + 1 : hash;
156 }
157
160 {
161 std::unique_ptr<unsigned char[]> buffer, zipBuffer;
162 auto &anchor = fAnchor.emplace();
163 int err;
164
166 daos_obj_id_t oidMetadata{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(this->GetIndex())};
167
169 if ((err = cont.ReadSingleAkey(buffer.get(), anchorSize, oidMetadata, kDistributionKeyDefault,
171 return err;
172 }
173
174 anchor.Deserialize(buffer.get(), anchorSize).Unwrap();
175 if (anchor.fVersionEpoch != ROOT::RNTuple::kVersionEpoch) {
176 throw ROOT::RException(R__FAIL("unsupported RNTuple epoch version: " + std::to_string(anchor.fVersionEpoch)));
177 }
178
179 builder.SetOnDiskHeaderSize(anchor.fNBytesHeader);
180 buffer = MakeUninitArray<unsigned char>(anchor.fLenHeader);
182 if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesHeader, oidMetadata, kDistributionKeyDefault,
184 return err;
185 RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesHeader, anchor.fLenHeader, buffer.get());
187
188 builder.AddToOnDiskFooterSize(anchor.fNBytesFooter);
189 buffer = MakeUninitArray<unsigned char>(anchor.fLenFooter);
191 if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesFooter, oidMetadata, kDistributionKeyDefault,
193 return err;
194 RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesFooter, anchor.fLenFooter, buffer.get());
196
197 return 0;
198 }
199
200 static std::pair<RDaosContainerNTupleLocator, ROOT::Internal::RNTupleDescriptorBuilder>
202 {
203 auto result = std::make_pair(RDaosContainerNTupleLocator(ntupleName), ROOT::Internal::RNTupleDescriptorBuilder());
204
205 auto &loc = result.first;
206 auto &builder = result.second;
207
208 if (int err = loc.InitNTupleDescriptorBuilder(cont, builder); !err) {
209 if (ntupleName.empty() || ntupleName != builder.GetDescriptor().GetName()) {
210 // Hash already taken by a differently-named ntuple.
211 throw ROOT::RException(
212 R__FAIL("LocateNTuple: ntuple name '" + ntupleName + "' unavailable in this container."));
213 }
214 }
215 return result;
216 }
217};
218
219} // anonymous namespace
220
221////////////////////////////////////////////////////////////////////////////////
222
241
244{
245 if (bufSize < 32)
246 return R__FAIL("DAOS anchor too short");
247
249 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
251 if (fVersionAnchor != RDaosNTupleAnchor().fVersionAnchor) {
252 return R__FAIL("unsupported DAOS anchor version: " + std::to_string(fVersionAnchor));
253 }
254
264 if (!result)
265 return R__FORWARD_ERROR(result);
266 return result.Unwrap() + 32;
267}
268
273
274////////////////////////////////////////////////////////////////////////////////
275
277 const ROOT::RNTupleWriteOptions &options)
278 : RPagePersistentSink(ntupleName, options), fURI(uri)
279{
280 static std::once_flag once;
281 std::call_once(once, []() {
282 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "The DAOS backend is experimental and still under development. "
283 << "Do not store real data with this version of RNTuple!";
284 });
285 EnableDefaultMetrics("RPageSinkDaos");
286}
287
289
291{
292 auto opts = dynamic_cast<RNTupleWriteOptionsDaos *>(fOptions.get());
293 fNTupleAnchor.fObjClass = opts ? opts->GetObjectClass() : RNTupleWriteOptionsDaos().GetObjectClass();
294 auto oclass = RDaosObject::ObjClassId(fNTupleAnchor.fObjClass);
295 if (oclass.IsUnknown())
296 throw ROOT::RException(R__FAIL("Unknown object class " + fNTupleAnchor.fObjClass));
297
298 size_t cageSz = opts ? opts->GetMaxCageSize() : RNTupleWriteOptionsDaos().GetMaxCageSize();
299 size_t pageSz = opts ? opts->GetMaxUnzippedPageSize() : RNTupleWriteOptionsDaos().GetMaxUnzippedPageSize();
300 fCageSizeLimit = std::max(cageSz, pageSz);
301
302 auto args = ParseDaosURI(fURI);
303 auto pool = std::make_shared<RDaosPool>(args.fPoolLabel);
304
305 fDaosContainer = std::make_unique<RDaosContainer>(pool, args.fContainerLabel, /*create =*/true);
306 fDaosContainer->SetDefaultObjectClass(oclass);
307
308 auto [locator, _] = RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName);
309 fNTupleIndex = locator.GetIndex();
310
312 auto szZipHeader =
313 RNTupleCompressor::Zip(serializedHeader, length, GetWriteOptions().GetCompression(), zipBuffer.get());
314 WriteNTupleHeader(zipBuffer.get(), szZipHeader, length);
315}
316
319{
320 auto element = columnHandle.fColumn->GetElement();
322 {
323 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallZip, fCounters->fTimeCpuZip);
324 sealedPage = SealPage(page, *element);
325 }
326
327 fCounters->fSzZip.Add(page.GetNBytes());
328 return CommitSealedPageImpl(columnHandle.fPhysicalId, sealedPage);
329}
330
334{
335 auto offsetData = fPageId.fetch_add(1);
336 ROOT::DescriptorId_t clusterId = fDescriptorBuilder.GetDescriptor().GetNActiveClusters();
337
338 {
339 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
341 fDaosContainer->WriteSingleAkey(sealedPage.GetBuffer(), sealedPage.GetBufferSize(), daosKey.fOid, daosKey.fDkey,
342 daosKey.fAkey);
343 }
344
347 result.SetNBytesOnStorage(sealedPage.GetDataSize());
349 fCounters->fNPageCommitted.Inc();
350 fCounters->fSzWritePayload.Add(sealedPage.GetBufferSize());
351 fNBytesCurrentCluster += sealedPage.GetBufferSize();
352 return result;
353}
354
355std::vector<ROOT::RNTupleLocator>
356ROOT::Experimental::Internal::RPageSinkDaos::CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges,
357 const std::vector<bool> &mask)
358{
360 std::vector<RNTupleLocator> locators;
361 auto nPages = mask.size();
362 locators.reserve(nPages);
363
364 const uint32_t maxCageSz = fCageSizeLimit;
365 const bool useCaging = fCageSizeLimit > 0;
366 const std::uint8_t locatorFlags = useCaging ? EDaosLocatorFlags::kCagedPage : 0;
367
368 ROOT::DescriptorId_t clusterId = fDescriptorBuilder.GetDescriptor().GetNActiveClusters();
369 int64_t payloadSz = 0;
370 std::size_t positionOffset;
371 uint32_t positionIndex;
372
373 /// Aggregate batch of requests by object ID and distribution key, determined by the ntuple-DAOS mapping
374 for (auto &range : ranges) {
375 positionOffset = 0;
376 /// Under caging, the atomic page counter is fetch-incremented for every column range to get the position of its
377 /// first cage and indicate the next one, also ensuring subsequent pages of different columns do not end up caged
378 /// together. This increment is not necessary in the absence of caging, as each page is trivially caged.
379 positionIndex = useCaging ? fPageId.fetch_add(1) : fPageId.load();
380
381 for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
383
385 positionOffset = 0;
386 positionIndex = fPageId.fetch_add(1);
387 }
388
390 d_iov_set(&pageIov, const_cast<void *>(s.GetBuffer()), s.GetBufferSize());
391
392 RDaosKey daosKey =
393 GetPageDaosKey<kDefaultDaosMapping>(fNTupleIndex, clusterId, range.fPhysicalColumnId, positionIndex);
396 it->second.Insert(daosKey.fAkey, pageIov);
397
400 locator.SetNBytesOnStorage(s.GetDataSize());
402 locator.SetReserved(locatorFlags);
403 locators.push_back(locator);
404
407 }
408 }
409 fNBytesCurrentCluster += payloadSz;
410
411 {
412 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
413 if (int err = fDaosContainer->WriteV(writeRequests))
414 throw ROOT::RException(R__FAIL("WriteV: error" + std::string(d_errstr(err))));
415 }
416
417 fCounters->fNPageCommitted.Add(nPages);
418 fCounters->fSzWritePayload.Add(payloadSz);
419
420 return locators;
421}
422
424{
425 return std::exchange(fNBytesCurrentCluster, 0);
426}
427
430 std::uint32_t length)
431{
433 auto szPageListZip =
434 RNTupleCompressor::Zip(serializedPageList, length, GetWriteOptions().GetCompression(), bufPageListZip.get());
435
436 auto offsetData = fClusterGroupId.fetch_add(1);
437 fDaosContainer->WriteSingleAkey(
439 daos_obj_id_t{kOidLowPageList, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)}, kDistributionKeyDefault,
443 result.SetNBytesOnStorage(szPageListZip);
445 fCounters->fSzWritePayload.Add(static_cast<int64_t>(szPageListZip));
446 return result;
447}
448
450 std::uint32_t length)
451{
453 auto szFooterZip =
454 RNTupleCompressor::Zip(serializedFooter, length, GetWriteOptions().GetCompression(), bufFooterZip.get());
455 WriteNTupleFooter(bufFooterZip.get(), szFooterZip, length);
456 WriteNTupleAnchor();
457}
458
460{
461 fDaosContainer->WriteSingleAkey(
462 data, nbytes, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
464 fNTupleAnchor.fLenHeader = lenHeader;
465 fNTupleAnchor.fNBytesHeader = nbytes;
466}
467
469{
470 fDaosContainer->WriteSingleAkey(
471 data, nbytes, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
473 fNTupleAnchor.fLenFooter = lenFooter;
474 fNTupleAnchor.fNBytesFooter = nbytes;
475}
476
478{
481 fNTupleAnchor.Serialize(buffer.get());
482 fDaosContainer->WriteSingleAkey(
483 buffer.get(), ntplSize, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
485}
486
487////////////////////////////////////////////////////////////////////////////////
488
490 const ROOT::RNTupleReadOptions &options)
491 : RPageSource(ntupleName, options),
492 fURI(uri),
493 fClusterPool(
494 std::make_unique<RClusterPool>(*this, ROOT::Internal::RNTupleReadOptionsManip::GetClusterBunchSize(options)))
495{
496 EnableDefaultMetrics("RPageSourceDaos");
497
498 auto args = ParseDaosURI(uri);
499 auto pool = std::make_shared<RDaosPool>(args.fPoolLabel);
500 fDaosContainer = std::make_unique<RDaosContainer>(pool, args.fContainerLabel);
501}
502
504
507{
509 std::unique_ptr<unsigned char[]> buffer, zipBuffer;
510
511 auto [locator, descBuilder] = RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName);
512 if (!locator.IsValid())
513 throw ROOT::RException(
514 R__FAIL("Attach: requested ntuple '" + fNTupleName + "' is not present in DAOS container."));
515
516 auto oclass = RDaosObject::ObjClassId(locator.fAnchor->fObjClass);
517 if (oclass.IsUnknown())
518 throw ROOT::RException(R__FAIL("Attach: unknown object class " + locator.fAnchor->fObjClass));
519
520 fDaosContainer->SetDefaultObjectClass(oclass);
521 fNTupleIndex = locator.GetIndex();
523
524 auto desc = descBuilder.MoveDescriptor();
525
526 for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
527 buffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLength());
528 zipBuffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLocator().GetNBytesOnStorage());
529 fDaosContainer->ReadSingleAkey(
530 zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
531 cgDesc.GetPageListLocator().GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
532 RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(),
533 cgDesc.GetPageListLength(), buffer.get());
534
535 RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
536 }
537
538 return desc;
539}
540
542{
543 return fDaosContainer->GetDefaultObjectClass().ToString();
544}
545
549{
550 const auto clusterId = localIndex.GetClusterId();
551
553 {
554 auto descriptorGuard = GetSharedDescriptorGuard();
555 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
556 pageInfo = clusterDescriptor.GetPageRange(physicalColumnId).Find(localIndex.GetIndexInCluster());
557 }
558
559 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
560 sealedPage.SetNElements(pageInfo.GetNElements());
561 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
562 if (!sealedPage.GetBuffer())
563 return;
564
565 if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
566 assert(!pageInfo.HasChecksum());
567 memcpy(const_cast<void *>(sealedPage.GetBuffer()), ROOT::Internal::RPage::GetPageZeroBuffer(),
568 sealedPage.GetBufferSize());
569 return;
570 }
571
572 if (pageInfo.GetLocator().GetReserved() & EDaosLocatorFlags::kCagedPage) {
573 // Suboptimal but hard to do differently: we load the full cage up to and including the requested page.
574 // In practice, individual LoadSealedPage calls are rare and usually full clusters are buffered.
575 // The support for extracting individual pages from a cage makes testing easier, however.
576 const auto [position, offset] =
577 DecodeDaosPagePosition(pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>());
578 RDaosKey daosKey = GetPageDaosKey<kDefaultDaosMapping>(fNTupleIndex, clusterId, physicalColumnId, position);
579 const auto bufSize = offset + sealedPage.GetBufferSize();
581 fDaosContainer->ReadSingleAkey(cageHeadBuffer.get(), bufSize, daosKey.fOid, daosKey.fDkey, daosKey.fAkey);
582 memcpy(const_cast<void *>(sealedPage.GetBuffer()), cageHeadBuffer.get() + offset, sealedPage.GetBufferSize());
583 } else {
584 RDaosKey daosKey =
586 pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation());
587 fDaosContainer->ReadSingleAkey(const_cast<void *>(sealedPage.GetBuffer()), sealedPage.GetBufferSize(),
588 daosKey.fOid, daosKey.fDkey, daosKey.fAkey);
589 }
590
591 sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
592}
593
597{
598 const auto columnId = columnHandle.fPhysicalId;
599 const auto clusterId = clusterInfo.fClusterId;
600 const auto &pageInfo = clusterInfo.fPageInfo;
601
602 const auto element = columnHandle.fColumn->GetElement();
603 const auto elementSize = element->GetSize();
604 const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;
605
606 if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
607 auto pageZero = fPageAllocator->NewPage(elementSize, pageInfo.GetNElements());
608 pageZero.GrowUnchecked(pageInfo.GetNElements());
609 memset(pageZero.GetBuffer(), 0, pageZero.GetNBytes());
610 pageZero.SetWindow(clusterInfo.fColumnOffset + pageInfo.GetFirstElementIndex(),
612 return fPagePool.RegisterPage(std::move(pageZero),
614 }
615
617 sealedPage.SetNElements(pageInfo.GetNElements());
618 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
619 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
620 std::unique_ptr<unsigned char[]> directReadBuffer; // only used if cluster pool is turned off
621
622 if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) {
623 if (pageInfo.GetLocator().GetReserved() & EDaosLocatorFlags::kCagedPage) {
624 throw ROOT::RException(R__FAIL("accessing caged pages is only supported in conjunction with cluster cache"));
625 }
626
629 fNTupleIndex, clusterId, columnId, pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation());
630 fDaosContainer->ReadSingleAkey(directReadBuffer.get(), sealedPage.GetBufferSize(), daosKey.fOid, daosKey.fDkey,
631 daosKey.fAkey);
632 fCounters->fNPageRead.Inc();
633 fCounters->fNRead.Inc();
634 fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize());
635 sealedPage.SetBuffer(directReadBuffer.get());
636 } else {
637 if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId))
638 fCurrentCluster = fClusterPool->GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet());
639 R__ASSERT(fCurrentCluster->ContainsColumn(columnId));
640
643 if (!cachedPageRef.Get().IsNull())
644 return cachedPageRef;
645
646 ROnDiskPage::Key key(columnId, pageInfo.GetPageNumber());
647 auto onDiskPage = fCurrentCluster->GetOnDiskPage(key);
648 R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize()));
649 sealedPage.SetBuffer(onDiskPage->GetAddress());
650 }
651
653 {
654 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip);
655 newPage = UnsealPage(sealedPage, *element).Unwrap();
656 fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements());
657 }
658
659 newPage.SetWindow(clusterInfo.fColumnOffset + pageInfo.GetFirstElementIndex(),
661 fCounters->fNPageUnsealed.Inc();
662 return fPagePool.RegisterPage(std::move(newPage), ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType});
663}
664
665std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
667{
668 auto clone = new RPageSourceDaos(fNTupleName, fURI, fOptions);
669 return std::unique_ptr<RPageSourceDaos>(clone);
670}
671
672std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
674{
676 ROOT::DescriptorId_t fClusterId = 0;
677 ROOT::DescriptorId_t fColumnId = 0;
678 ROOT::NTupleSize_t fPageNo = 0;
679 std::uint64_t fPosition = 0;
680 std::uint64_t fCageOffset = 0;
681 std::uint64_t fDataSize = 0; // page payload
682 std::uint64_t fBufferSize = 0; // page payload + checksum (if available)
683 };
684
685 // Prepares read requests for a single cluster; `readRequests` is modified by this function. Requests are coalesced
686 // by OID and distribution key.
687 // TODO(jalopezg): this may be a private member function; that, however, requires additional changes given that
688 // `RDaosContainer::MultiObjectRWOperation_t` cannot be forward-declared
691 auto clusterId = clusterKey.fClusterId;
692 // Group page locators by their position in the object store; with caging enabled, this facilitates the
693 // processing of cages' requests together into a single IOV to be loaded.
694 std::unordered_map<std::uint32_t, std::vector<RDaosSealedPageLocator>> onDiskPages;
695
696 unsigned clusterBufSz = 0, nPages = 0;
697 auto pageZeroMap = std::make_unique<ROnDiskPageMap>();
698 PrepareLoadCluster(
702 const auto &pageLocator = pageInfo.GetLocator();
703 uint32_t position, offset;
704 std::tie(position, offset) = DecodeDaosPagePosition(pageLocator.GetPosition<RNTupleLocatorObject64>());
705 auto [itLoc, _] = onDiskPages.emplace(position, std::vector<RDaosSealedPageLocator>());
706 auto pageBufferSize = pageLocator.GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum;
707
708 itLoc->second.push_back({clusterId, physicalColumnId, pageNo, position, offset,
709 pageLocator.GetNBytesOnStorage(), pageBufferSize});
710 ++nPages;
712 });
713
714 auto clusterBuffer = new unsigned char[clusterBufSz];
715 auto pageMap = std::make_unique<ROnDiskPageMapHeap>(std::unique_ptr<unsigned char[]>(clusterBuffer));
716
718 // Fill the cluster page map and the read requests for the RDaosContainer::ReadV() call
719 for (auto &[cageIndex, pageVec] : onDiskPages) {
720 auto columnId = pageVec[0].fColumnId; // All pages in a cage belong to the same column
721 std::size_t cageSz = 0;
722
723 for (auto &s : pageVec) {
724 assert(columnId == s.fColumnId);
725 assert(cageIndex == s.fPosition);
726 // Register the on disk pages in a page map
727 ROnDiskPage::Key key(s.fColumnId, s.fPageNo);
728 pageMap->Register(key, ROnDiskPage(cageBuffer + s.fCageOffset, s.fBufferSize));
729 cageSz += s.fBufferSize;
730 }
731
732 // Prepare new read request batched up by object ID and distribution key
733 d_iov_t iov;
735
739 itReq->second.Insert(daosKey.fAkey, iov);
740
742 }
743 fCounters->fNPageRead.Add(nPages);
744 fCounters->fSzReadPayload.Add(clusterBufSz);
745
746 auto cluster = std::make_unique<RCluster>(clusterId);
747 cluster->Adopt(std::move(pageMap));
748 cluster->Adopt(std::move(pageZeroMap));
749 for (auto colId : clusterKey.fPhysicalColumnSet)
750 cluster->SetColumnAvailable(colId);
751 return cluster;
752 };
753
754 fCounters->fNClusterLoaded.Add(clusterKeys.size());
755
756 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>> clusters;
758 for (auto key : clusterKeys) {
759 clusters.emplace_back(fnPrepareSingleCluster(key, readRequests));
760 }
761
762 {
763 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
764 if (int err = fDaosContainer->ReadV(readRequests))
765 throw ROOT::RException(R__FAIL("ReadV: error" + std::string(d_errstr(err))));
766 }
767 fCounters->fNReadV.Inc();
768 fCounters->fNRead.Add(readRequests.size());
769
770 return clusters;
771}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define h(i)
Definition RSha256.hxx:106
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
UInt_t Hash(const TString &s)
Definition TString.h:494
#define _(A, B)
Definition cfortran.h:108
Managed a set of clusters containing compressed and packed pages.
A RDaosContainer provides read/write access to objects in a given container.
Definition RDaos.hxx:157
RDaosObject::DistributionKey_t DistributionKey_t
Definition RDaos.hxx:160
std::unordered_map< ROidDkeyPair, RWOperation, ROidDkeyPair::Hash > MultiObjectRWOperation_t
Definition RDaos.hxx:231
RDaosObject::AttributeKey_t AttributeKey_t
Definition RDaos.hxx:161
A helper class for serializing and deserialization of the RNTuple binary format.
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId, RNTupleDescriptor &desc, EDescriptorDeserializeMode mode)
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:42
Base class for a sink with a physical storage backend.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const ROOT::Internal::RPage &page) final
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
void WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
void WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPageSinkDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleWriteOptions &options)
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Storage provider that reads ntuple pages from a DAOS container.
std::string GetObjectClass() const
Return the object class used for user data OIDs in this ntuple.
ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final
std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by sealedPage.
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final
LoadStructureImpl() has been called before AttachImpl() is called
std::unique_ptr< RDaosContainer > fDaosContainer
A container that stores object data (header/footer, pages, etc.)
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new connection to the pool/container.
RPageSourceDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleReadOptions &options)
Abstract interface to read data from an ntuple.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
DAOS-specific user-tunable settings for storing ntuples.
Helper class to compress data blocks in the ROOT compression frame format.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
A helper class for piece-wise construction of an RNTupleDescriptor.
const RNTupleDescriptor & GetDescriptor() const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
Reference to a page stored in the page pool.
Stores information about the cluster in which this page resides.
Definition RPage.hxx:55
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:46
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
Definition RPage.cxx:25
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
The on-storage meta-data of an ntuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
RNTupleLocator payload that is common for object stores using 64bit location information.
std::uint64_t GetLocation() const
Generic information about the physical location of data.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
std::size_t GetMaxUnzippedPageSize() const
static constexpr std::uint16_t kVersionEpoch
Definition RNTuple.hxx:79
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
const char * d_errstr(int rc)
static void d_iov_set(d_iov_t *iov, void *buf, size_t size)
Definition daos.h:50
uint16_t daos_oclass_id_t
Definition daos.h:135
@ OC_SX
Definition daos.h:129
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
A pair of <object ID, distribution key> that can be used to issue a fetch/update request for multiple...
Definition RDaos.hxx:166
Describes a read/write operation on multiple attribute keys under the same object ID and distribution...
Definition RDaos.hxx:190
Entry point for an RNTuple in a DAOS container.
std::uint32_t fNBytesFooter
The size of the compressed ntuple footer.
std::uint64_t fVersionAnchor
Allows for evolving the struct in future versions.
std::string fObjClass
The object class for user data OIDs, e.g. SX
std::uint16_t fVersionEpoch
Version of the binary format supported by the writer.
RResult< std::uint32_t > Deserialize(const void *buffer, std::uint32_t bufSize)
std::uint32_t fLenHeader
The size of the uncompressed ntuple header.
std::uint32_t fLenFooter
The size of the uncompressed ntuple footer.
std::uint32_t fNBytesHeader
The size of the compressed ntuple header.
static constexpr std::size_t kOCNameMaxLength
This limit is currently not defined in any header and any call to daos_oclass_id2name() within DAOS u...
Definition RDaos.hxx:108
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:52
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Information about a single page in the context of a cluster's page range.
iovec for memory buffer
Definition daos.h:37
uint64_t hi
Definition daos.h:147
uint64_t lo
Definition daos.h:146
TMarker m
Definition textangle.C:8