Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5
6/*************************************************************************
7 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RPageStorageFile
15#define ROOT_RPageStorageFile
16
17#include <ROOT/RMiniFile.hxx>
18#include <ROOT/RNTuple.hxx>
20#include <ROOT/RNTupleZip.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RRawFile.hxx>
23#include <string_view>
24
25#include <array>
26#include <cstdio>
27#include <memory>
28#include <optional>
29#include <string>
30#include <utility>
31
32class TDirectory;
33
34namespace ROOT {
35class RNTuple; // for making RPageSourceFile a friend of RNTuple
36class RNTupleLocator;
37
38namespace Experimental {
39class RFile;
40}
41
42namespace Internal {
43class RRawFile;
45
46// clang-format off
47/**
48\class ROOT::Internal::RPageSinkFile
49\ingroup NTuple
50\brief Storage provider that write ntuple pages into a file
51
52The written file can be either in ROOT format or in RNTuple bare format.
53*/
54// clang-format on
56private:
57 // A set of pages to be committed together in a vector write.
58 // Currently we assume they're all sequential (although they may span multiple ranges).
59 struct CommitBatch {
60 /// The list of pages to commit
61 std::vector<const RSealedPage *> fSealedPages;
62 /// Total size in bytes of the batch
63 size_t fSize;
64 /// Total uncompressed size of the elements in the page batch
66 };
67
68 std::unique_ptr<ROOT::Internal::RNTupleFileWriter> fWriter;
69 /// Number of bytes committed to storage in the current cluster
70 std::uint64_t fNBytesCurrentCluster = 0;
71 /// On UpdateSchema(), the new class fields register the corresponding streamer info here so that the
72 /// streamer info records in the file can be properly updated on dataset commit
74
75 RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
76 RPageSinkFile(std::unique_ptr<ROOT::Internal::RNTupleFileWriter> writer, const ROOT::RNTupleWriteOptions &options);
77
78 /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
79 /// key. It is not strictly necessary to write and read the sealed page.
80 RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked);
81
82 /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
83 /// contained in `batch`. The locators for the written pages are appended to `locators`.
84 /// This procedure also updates some internal metrics of the page sink, hence it's not const.
85 /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
86 void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
87
88protected:
90 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
91 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override;
93 CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
94 std::vector<RNTupleLocator>
95 CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
96 std::uint64_t StageClusterImpl() final;
97 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
99 RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
100
101public:
102 RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
103 RPageSinkFile(std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options);
104 RPageSinkFile(std::string_view ntupleName, ROOT::Experimental::RFile &file, std::string_view ntupleDir,
105 const ROOT::RNTupleWriteOptions &options);
106 RPageSinkFile(const RPageSinkFile &) = delete;
107 RPageSinkFile &operator=(const RPageSinkFile &) = delete;
109 RPageSinkFile &operator=(RPageSinkFile &&) = default;
110 ~RPageSinkFile() override;
111
112 void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final;
113
115 CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override;
116}; // class RPageSinkFile
117
118// clang-format off
119/**
120\class ROOT::Internal::RPageSourceFile
121\ingroup NTuple
122\brief Storage provider that reads ntuple pages from a file
123*/
124// clang-format on
126 friend class ROOT::RNTuple;
127
128private:
129 /// Holds the uncompressed header and footer
131 std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
132 void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer
133 void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer
134
135 /// Called at the end of Attach(), i.e. when the header and footer are processed
136 void Reset()
137 {
138 RStructureBuffer empty;
139 std::swap(empty, *this);
140 }
141 };
142
143 /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
144 std::optional<RNTuple> fAnchor;
145 /// The last cluster from which a page got loaded. Points into fClusterPool->fPool
147 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
148 std::unique_ptr<RRawFile> fFile;
149 /// Takes the fFile to read ntuple blobs from it
151 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
153 /// Populated by LoadStructureImpl(), reset at the end of Attach()
155 /// Tracks the last read offset for seek distance calculation
156 std::uint64_t fLastOffset = 0;
157
158 /// File-specific I/O performance counters
165 std::unique_ptr<RFileCounters> fFileCounters;
166 /// Total file size, set once in AttachImpl()
167 std::int64_t fFileSize = 0;
168
169 RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
170
171 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
172 /// read requests for a given cluster and columns. The reead requests are appended to
173 /// the provided vector. This way, requests can be collected for multiple clusters before
174 /// sending them to RRawFile::ReadV().
175 std::unique_ptr<ROOT::Internal::RCluster>
176 PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
177
178protected:
179 void LoadStructureImpl() final;
180 ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
181 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
182 std::unique_ptr<RPageSource> CloneImpl() const final;
183
185 LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
186
187public:
188 RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
189 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<RRawFile> file,
190 const ROOT::RNTupleReadOptions &options);
191 /// Used from the RNTuple class to build a datasource if the anchor is already available.
192 /// Requires the RNTuple object to be streamed from a file.
193 static std::unique_ptr<RPageSourceFile>
194 CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
195
197 RPageSourceFile &operator=(const RPageSourceFile &) = delete;
199 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
200 ~RPageSourceFile() override;
201
203 const ROOT::RNTupleReadOptions &options = {}) final;
204
205 void
206 LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
207
208 std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
209 LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
210
211 void LoadStreamerInfo() final;
212}; // class RPageSourceFile
213
214} // namespace Internal
215} // namespace ROOT
216
217#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
char name[80]
Definition TGX11.cxx:148
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
Definition RFile.hxx:252
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:148
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:61
A helper class for piece-wise construction of an RNTupleDescriptor.
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Uses standard C++ memory allocation for the column data pages.
RPagePersistentSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Reference to a page stored in the page pool.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::unique_ptr< RPageSink > CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override
Creates a new sink with the same underlying storage as this but writing to a different RNTuple named ...
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
std::unique_ptr< ROOT::Internal::RNTupleFileWriter > fWriter
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fInfosOfClassFields
On UpdateSchema(), the new class fields register the corresponding streamer info here so that the str...
Abstract interface to write data into an ntuple.
std::int64_t fFileSize
Total file size, set once in AttachImpl()
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< ROOT::Internal::RCluster > PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector< RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
std::uint64_t fLastOffset
Tracks the last read offset for seek distance calculation.
ROOT::Internal::RCluster * fCurrentCluster
The last cluster from which a page got loaded. Points into fClusterPool->fPool.
std::unique_ptr< RPageSource > OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink, const ROOT::RNTupleReadOptions &options={}) final
Creates a new PageSource using the same underlying file as this but referring to a different RNTuple,...
RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final
LoadStructureImpl() has been called before AttachImpl() is called
std::unique_ptr< RFileCounters > fFileCounters
RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options)
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
RStructureBuffer fStructureBuffer
Populated by LoadStructureImpl(), reset at the end of Attach()
std::unique_ptr< RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::optional< RNTuple > fAnchor
Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name.
ROOT::Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RPageSource(std::string_view ntupleName, const ROOT::RNTupleReadOptions &fOptions)
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:44
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
The on-storage metadata of an RNTuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:68
Describe directory structure in memory.
Definition TDirectory.h:45
STL class.
STL class.
Namespace for ROOT features in testing.
Definition TROOT.h:100
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:152
The incremental changes to a RNTupleModel
size_t fSize
Total size in bytes of the batch.
std::vector< const RSealedPage * > fSealedPages
The list of pages to commit.
size_t fBytesPacked
Total uncompressed size of the elements in the page batch.
File-specific I/O performance counters.
ROOT::Experimental::Detail::RNTupleCalcPerf & fSparseness
ROOT::Experimental::Detail::RNTupleCalcPerf & fSzFile
ROOT::Experimental::Detail::RNTupleAtomicCounter & fSzSkip
ROOT::Experimental::Detail::RNTupleCalcPerf & fRandomness
Holds the uncompressed header and footer.
void * fPtrHeader
either nullptr or points into fBuffer
void Reset()
Called at the end of Attach(), i.e. when the header and footer are processed.
void * fPtrFooter
either nullptr or points into fBuffer
std::unique_ptr< unsigned char[]> fBuffer
single buffer for both header and footer
Summarizes cluster-level information that are necessary to load a certain page. Used by LoadPageImpl(...
A sealed page contains the bytes of a page as written to storage (packed & compressed).