92 RNTupleAtomicTimer
timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
99 fCounters->fNPageCommitted.Inc();
100 fCounters->fSzWritePayload.Add(
sealedPage.GetBufferSize());
101 fNBytesCurrentCluster +=
sealedPage.GetBufferSize();
111 RNTupleAtomicTimer
timer(fCounters->fTimeWallZip, fCounters->fTimeCpuZip);
115 fCounters->fSzZip.Add(
page.GetNBytes());
122 const auto nBits = fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
physicalColumnId).GetBitsOnStorage();
129 RNTupleAtomicTimer
timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
131 std::uint64_t
offset = fWriter->ReserveBlob(
batch.fSize,
batch.fBytesPacked);
144 fCounters->fNPageCommitted.Add(
batch.fSealedPages.size());
145 fCounters->fSzWritePayload.Add(
batch.fSize);
146 fNBytesCurrentCluster +=
batch.fSize;
149 batch.fBytesPacked = 0;
150 batch.fSealedPages.clear();
153std::vector<ROOT::RNTupleLocator>
155 const std::vector<bool> &
mask)
157 const std::uint64_t
maxKeySize = fOptions->GetMaxKeySize();
160 std::vector<RNTupleLocator>
locators;
162 std::size_t
iPage = 0;
171 fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
range.fPhysicalColumnId).GetBitsOnStorage();
205 fCounters->fNPageCommitted.Inc();
206 fCounters->fSzWritePayload.Add(
sealedPageIt->GetBufferSize());
217 if (
batch.fSize > 0) {
226 auto result = fNBytesCurrentCluster;
227 fNBytesCurrentCluster = 0;
246 fWriter->UpdateStreamerInfos(fDescriptorBuilder.BuildStreamerInfos());
251 fWriter->Commit(GetWriteOptions().GetCompression());
265 std::unique_ptr<ROOT::Internal::RRawFile> file,
269 fFile = std::move(file);
280std::unique_ptr<ROOT::Internal::RPageSourceFile>
284 throw RException(
R__FAIL(
"This RNTuple object was not streamed from a ROOT file (TFile or descendant)"));
286 std::unique_ptr<ROOT::Internal::RRawFile>
rawFile;
290 std::string className =
anchor.fFile->IsA()->GetName();
291 auto url =
anchor.fFile->GetEndpointUrl();
292 auto protocol = std::string(
url->GetProtocol());
293 if (className ==
"TFile") {
295 }
else if (className ==
"TDavixFile" || className ==
"TNetXNGFile") {
301 auto pageSource = std::make_unique<RPageSourceFile>(
"", std::move(
rawFile), options);
314 fAnchor = fReader.GetNTuple(fNTupleName).Unwrap();
316 fReader.SetMaxKeySize(fAnchor->GetMaxKeySize());
320 throw RException(
R__FAIL(
"unsupported RNTuple epoch version: " + std::to_string(fAnchor->GetVersionEpoch())));
323 fDescriptorBuilder.SetOnDiskHeaderSize(fAnchor->GetNBytesHeader());
324 fDescriptorBuilder.AddToOnDiskFooterSize(fAnchor->GetNBytesFooter());
327 const auto bufSize = fAnchor->GetNBytesHeader() + fAnchor->GetNBytesFooter() +
328 std::max(fAnchor->GetLenHeader(), fAnchor->GetLenFooter());
330 fStructureBuffer.fPtrHeader = fStructureBuffer.fBuffer.get();
331 fStructureBuffer.fPtrFooter = fStructureBuffer.fBuffer.get() + fAnchor->GetNBytesHeader();
338 (std::max(fAnchor->GetNBytesHeader(), fAnchor->GetNBytesFooter()) >
readvLimits.fMaxSingleSize) ||
339 (fAnchor->GetNBytesHeader() + fAnchor->GetNBytesFooter() >
readvLimits.fMaxTotalSize)) {
340 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
341 fReader.ReadBuffer(fStructureBuffer.fPtrHeader, fAnchor->GetNBytesHeader(), fAnchor->GetSeekHeader());
342 fReader.ReadBuffer(fStructureBuffer.fPtrFooter, fAnchor->GetNBytesFooter(), fAnchor->GetSeekFooter());
343 fCounters->fNRead.Add(2);
345 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
346 R__ASSERT(fAnchor->GetNBytesHeader() < std::numeric_limits<std::size_t>::max());
347 R__ASSERT(fAnchor->GetNBytesFooter() < std::numeric_limits<std::size_t>::max());
349 static_cast<std::size_t
>(fAnchor->GetNBytesHeader()), 0},
350 {fStructureBuffer.fPtrFooter, fAnchor->GetSeekFooter(),
351 static_cast<std::size_t
>(fAnchor->GetNBytesFooter()), 0}};
353 fCounters->fNReadV.Inc();
359 auto unzipBuf =
reinterpret_cast<unsigned char *
>(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter();
369 auto desc = fDescriptorBuilder.MoveDescriptor();
371 std::vector<unsigned char> buffer;
372 for (
const auto &
cgDesc : desc.GetClusterGroupIterable()) {
373 buffer.resize(std::max<size_t>(buffer.size(),
374 cgDesc.GetPageListLength() +
cgDesc.GetPageListLocator().GetNBytesOnStorage()));
376 fReader.ReadBuffer(
zipBuffer,
cgDesc.GetPageListLocator().GetNBytesOnStorage(),
377 cgDesc.GetPageListLocator().GetPosition<std::uint64_t>());
379 cgDesc.GetPageListLength(), buffer.data());
385 fFile->SetBuffering(
false);
409 pageInfo.GetLocator().GetPosition<std::uint64_t>());
416 sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
449 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
451 pageInfo.GetLocator().GetPosition<std::uint64_t>());
453 fCounters->fNPageRead.Inc();
454 fCounters->fNRead.Inc();
455 fCounters->fSzReadPayload.Add(
sealedPage.GetBufferSize());
458 if (!fCurrentCluster || (fCurrentCluster->GetId() !=
clusterId) || !fCurrentCluster->ContainsColumn(
columnId))
459 fCurrentCluster = fClusterPool->GetCluster(
clusterId, fActivePhysicalColumns.ToColumnSet());
468 auto onDiskPage = fCurrentCluster->GetOnDiskPage(key);
475 RNTupleAtomicTimer
timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip);
482 fCounters->fNPageUnsealed.Inc();
489 clone->fFile = fFile->Clone();
491 return std::unique_ptr<RPageSourceFile>(clone);
494std::unique_ptr<ROOT::Internal::RCluster>
496 std::vector<ROOT::Internal::RRawFile::RIOVec> &
readRequests)
501 std::uint64_t fOffset = 0;
502 std::uint64_t
fSize = 0;
508 auto pageZeroMap = std::make_unique<ROnDiskPageMap>();
534 std::vector<std::size_t>
gaps;
537 for (
unsigned i = 1; i <
onDiskPages.size(); ++i) {
540 gaps.emplace_back(std::max(gap, std::int64_t(0)));
548 for (
auto g :
gaps) {
570 const std::uint64_t
maxKeySize = fReader.GetMaxKeySize();
575 const std::uint64_t
overhead = std::max(
static_cast<std::int64_t
>(s.fOffset) -
readUpTo, std::int64_t(0));
576 const std::uint64_t
extent = std::max(
static_cast<std::int64_t
>(s.fOffset + s.fSize) -
readUpTo, std::int64_t(0));
580 s.fBufPos =
reinterpret_cast<intptr_t
>(
req.fBuffer) + s.fOffset -
req.fOffset;
589 req.fBuffer =
reinterpret_cast<unsigned char *
>(
req.fBuffer) +
req.fSize;
590 s.fBufPos =
reinterpret_cast<intptr_t
>(
req.fBuffer);
593 req.fOffset = s.fOffset;
597 fCounters->fSzReadPayload.Add(
szPayload);
601 auto buffer =
new unsigned char[
reinterpret_cast<intptr_t
>(
req.fBuffer) +
req.fSize];
602 auto pageMap = std::make_unique<ROOT::Internal::ROnDiskPageMapHeap>(std::unique_ptr<
unsigned char[]>(buffer));
620std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
623 fCounters->fNClusterLoaded.Add(
clusterKeys.size());
625 std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
clusters;
626 std::vector<ROOT::Internal::RRawFile::RIOVec>
readRequests;
644 for (std::size_t i = 0; i <
nBatch; ++i) {
660 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
663 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
666 fCounters->fNReadV.Inc();
667 fCounters->fNRead.Add(
nBatch);
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
Managed a set of clusters containing compressed and packed pages.
An in-memory subset of the packed and compressed pages of a cluster.
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Helper class to compress data blocks in the ROOT compression frame format.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
Write RNTuple data blocks in a TFile or a bare file container.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const ROOT::RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize)
The directory parameter can also be a TFile object (TFile inherits from TDirectory).
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId, RNTupleDescriptor &desc, EDescriptorDeserializeMode mode)
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
EDescriptorDeserializeMode
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
A memory region that contains packed and compressed pages.
A page as being stored on disk, that is packed and compressed.
Base class for a sink with a physical storage backend.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
A thread-safe cache of pages loaded from the page source.
Storage provider that write ntuple pages into a file.
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
void CommitDatasetImpl() final
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
std::unique_ptr< ROOT::Internal::RNTupleFileWriter > fWriter
~RPageSinkFile() override
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
Storage provider that reads ntuple pages from a file.
std::unique_ptr< ROOT::Internal::RCluster > PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector< RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
~RPageSourceFile() override
RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final
LoadStructureImpl() has been called before AttachImpl() is called
std::vector< std::unique_ptr< ROOT::Internal::RCluster > > LoadClusters(std::span< ROOT::Internal::RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options)
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
void LoadStructureImpl() final
std::unique_ptr< RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
ROOT::Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by sealedPage.
Abstract interface to read data from an ntuple.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
Stores information about the cluster in which this page resides.
A page is a slice of a column that is mapped into memory.
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
The RRawFileTFile wraps an open TFile, but does not take ownership.
The RRawFile provides read-only access to local and remote files.
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Base class for all ROOT issued exceptions.
The on-storage metadata of an RNTuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
std::uint64_t GetMaxKeySize() const
Representation of an RNTuple data set in a ROOT file.
static constexpr std::uint16_t kVersionEpoch
const_iterator begin() const
const_iterator end() const
Describe directory structure in memory.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
On-disk pages within a page source are identified by the column and page number.
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Used for vector reads from multiple offsets into multiple buffers.
Information about a single page in the context of a cluster's page range.