Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-11-21
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RPageStorageFile
14#define ROOT_RPageStorageFile
15
16#include <ROOT/RMiniFile.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RNTupleZip.hxx>
20#include <ROOT/RPageStorage.hxx>
21#include <ROOT/RRawFile.hxx>
22#include <string_view>
23
24#include <array>
25#include <cstdio>
26#include <memory>
27#include <optional>
28#include <string>
29#include <utility>
30
31class TDirectory;
32
33namespace ROOT {
34class RNTuple; // for making RPageSourceFile a friend of RNTuple
35class RNTupleLocator;
36
37namespace Experimental {
38class RFile;
39}
40
41namespace Internal {
42class RRawFile;
43class RPageAllocatorHeap;
44
45// clang-format off
46/**
47\class ROOT::Internal::RPageSinkFile
48\ingroup NTuple
49\brief Storage provider that write ntuple pages into a file
50
51The written file can be either in ROOT format or in RNTuple bare format.
52*/
53// clang-format on
55private:
56 // A set of pages to be committed together in a vector write.
57 // Currently we assume they're all sequential (although they may span multiple ranges).
58 struct CommitBatch {
59 /// The list of pages to commit
60 std::vector<const RSealedPage *> fSealedPages;
61 /// Total size in bytes of the batch
62 size_t fSize;
63 /// Total uncompressed size of the elements in the page batch
65 };
66
67 std::unique_ptr<ROOT::Internal::RNTupleFileWriter> fWriter;
68 /// Number of bytes committed to storage in the current cluster
69 std::uint64_t fNBytesCurrentCluster = 0;
70 /// On UpdateSchema(), the new class fields register the corresponding streamer info here so that the
71 /// streamer info records in the file can be properly updated on dataset commit
73
74 RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
75 RPageSinkFile(std::unique_ptr<ROOT::Internal::RNTupleFileWriter> writer, const ROOT::RNTupleWriteOptions &options);
76
77 /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
78 /// key. It is not strictly necessary to write and read the sealed page.
80
81 /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
82 /// contained in `batch`. The locators for the written pages are appended to `locators`.
83 /// This procedure also updates some internal metrics of the page sink, hence it's not const.
84 /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
85 void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
86
87protected:
89 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
93 std::vector<RNTupleLocator>
94 CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
95 std::uint64_t StageClusterImpl() final;
98 RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
99
100public:
101 RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
103 RPageSinkFile(std::string_view ntupleName, ROOT::Experimental::RFile &file, std::string_view ntupleDir,
104 const ROOT::RNTupleWriteOptions &options);
110
112
113 std::unique_ptr<RPageSink>
115}; // class RPageSinkFile
116
117// clang-format off
118/**
119\class ROOT::Internal::RPageSourceFile
120\ingroup NTuple
121\brief Storage provider that reads ntuple pages from a file
122*/
123// clang-format on
125 friend class ROOT::RNTuple;
126
127private:
128 /// Holds the uncompressed header and footer
130 std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
131 void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer
132 void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer
133
134 /// Called at the end of Attach(), i.e. when the header and footer are processed
135 void Reset()
136 {
137 RStructureBuffer empty;
138 std::swap(empty, *this);
139 }
140 };
141
142 /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
143 std::optional<RNTuple> fAnchor;
144 /// The last cluster from which a page got loaded. Points into fClusterPool->fPool
145 ROOT::Internal::RCluster *fCurrentCluster = nullptr;
146 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
147 std::unique_ptr<RRawFile> fFile;
148 /// Takes the fFile to read ntuple blobs from it
150 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
152 /// Populated by LoadStructureImpl(), reset at the end of Attach()
154 /// Tracks the last read offset for seek distance calculation
155 std::uint64_t fLastOffset = 0;
156
157 /// File-specific I/O performance counters
164 std::unique_ptr<RFileCounters> fFileCounters;
165 /// Total file size, set once in AttachImpl()
166 std::int64_t fFileSize = 0;
167
168 RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
169
170 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
171 /// read requests for a given cluster and columns. The reead requests are appended to
172 /// the provided vector. This way, requests can be collected for multiple clusters before
173 /// sending them to RRawFile::ReadV().
174 std::unique_ptr<ROOT::Internal::RCluster>
175 PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
176
177protected:
178 void LoadStructureImpl() final;
179 ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
180 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
181 std::unique_ptr<RPageSource> CloneImpl() const final;
182
183 void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;
184
185public:
186 RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
187 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<RRawFile> file,
188 const ROOT::RNTupleReadOptions &options);
189 /// Used from the RNTuple class to build a datasource if the anchor is already available.
190 /// Requires the RNTuple object to be streamed from a file.
191 static std::unique_ptr<RPageSourceFile>
192 CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
193
199
200 std::unique_ptr<RPageSource> OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink,
201 const ROOT::RNTupleReadOptions &options = {}) final;
202
203 std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
204 LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
205
206 void LoadStreamerInfo() final;
207}; // class RPageSourceFile
208
209} // namespace Internal
210} // namespace ROOT
211
212#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
char name[80]
Definition TGX11.cxx:148
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:147
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:60
A helper class for piece-wise construction of an RNTupleDescriptor.
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Base class for a sink with a physical storage backend.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Storage provider that write ntuple pages into a file.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::unique_ptr< RPageSink > CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override
Creates a new sink with the same underlying storage as this but writing to a different RNTuple named ...
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLink CommitDatasetImpl() final
std::unique_ptr< ROOT::Internal::RNTupleFileWriter > fWriter
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fInfosOfClassFields
On UpdateSchema(), the new class fields register the corresponding streamer info here so that the str...
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< RFileCounters > fFileCounters
RStructureBuffer fStructureBuffer
Populated by LoadStructureImpl(), reset at the end of Attach()
std::unique_ptr< RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::optional< RNTuple > fAnchor
Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name.
ROOT::Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:43
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
The on-storage metadata of an RNTuple.
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
Describe directory structure in memory.
Definition TDirectory.h:45
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:151
The incremental changes to a RNTupleModel
size_t fSize
Total size in bytes of the batch.
std::vector< const RSealedPage * > fSealedPages
The list of pages to commit.
size_t fBytesPacked
Total uncompressed size of the elements in the page batch.
File-specific I/O performance counters.
ROOT::Experimental::Detail::RNTupleCalcPerf & fSparseness
ROOT::Experimental::Detail::RNTupleCalcPerf & fSzFile
ROOT::Experimental::Detail::RNTupleAtomicCounter & fSzSkip
ROOT::Experimental::Detail::RNTupleCalcPerf & fRandomness
Holds the uncompressed header and footer.
void Reset()
Called at the end of Attach(), i.e. when the header and footer are processed.
std::unique_ptr< unsigned char[]> fBuffer
single buffer for both header and footer
A sealed page contains the bytes of a page as written to storage (packed & compressed).