Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RRawFile.hxx>
24#include <string_view>
25
26#include <array>
27#include <cstdio>
28#include <memory>
29#include <string>
30#include <utility>
31
32class TFile;
33
34namespace ROOT {
35
36namespace Internal {
37class RRawFile;
38}
39
40namespace Experimental {
41class RNTuple; // for making RPageSourceFile a friend of RNTuple
42
43namespace Internal {
44class RClusterPool;
46class RPagePool;
47
48// clang-format off
49/**
50\class ROOT::Experimental::Internal::RPageSinkFile
51\ingroup NTuple
52\brief Storage provider that write ntuple pages into a file
53
54The written file can be either in ROOT format or in RNTuple bare format.
55*/
56// clang-format on
58private:
59 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
60
61 std::unique_ptr<RNTupleFileWriter> fWriter;
62 /// Number of bytes committed to storage in the current cluster
63 std::uint64_t fNBytesCurrentCluster = 0;
64 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
65
67 std::size_t bytesPacked);
68
69protected:
71 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
72 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
74 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
75 std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges) final;
76 std::uint64_t CommitClusterImpl() final;
77 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
78 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
79
80public:
81 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
82 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83 RPageSinkFile(const RPageSinkFile&) = delete;
84 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
86 RPageSinkFile& operator=(RPageSinkFile&&) = default;
87 ~RPageSinkFile() override;
88
89 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
90 void ReleasePage(RPage &page) final;
91}; // class RPageSinkFile
92
93// clang-format off
94/**
95\class ROOT::Experimental::Internal::RPageSourceFile
96\ingroup NTuple
97\brief Storage provider that reads ntuple pages from a file
98*/
99// clang-format on
102
103private:
104 /// Summarizes cluster-level information that are necessary to populate a certain page.
105 /// Used by PopulatePageFromCluster().
108 /// Location of the page on disk
110 /// The first element number of the page's column in the given cluster
111 std::uint64_t fColumnOffset = 0;
112 };
113
114 /// Populated pages might be shared; the page pool might, at some point, be used by multiple page sources
115 std::shared_ptr<RPagePool> fPagePool;
116 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
118 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
119 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
120 /// Takes the fFile to read ntuple blobs from it
122 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
124 /// The cluster pool asynchronously preloads the next few clusters
125 std::unique_ptr<RClusterPool> fClusterPool;
126
127 /// Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder
128 void InitDescriptor(const RNTuple &anchor);
129
130 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
131
132 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
133 ClusterSize_t::ValueType idxInCluster);
134
135 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
136 /// read requests for a given cluster and columns. The reead requests are appended to
137 /// the provided vector. This way, requests can be collected for multiple clusters before
138 /// sending them to RRawFile::ReadV().
139 std::unique_ptr<RCluster> PrepareSingleCluster(
140 const RCluster::RKey &clusterKey,
141 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
142
143protected:
145 void UnzipClusterImpl(RCluster *cluster) final;
146
147public:
148 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
149 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
150 const RNTupleReadOptions &options);
151 /// Used from the RNTuple class to build a datasource if the anchor is already available.
152 /// Requires the RNTuple object to be streamed from a file.
153 static std::unique_ptr<RPageSourceFile>
154 CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options = RNTupleReadOptions());
155 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
156 /// The meta-data (header and footer) is reread and parsed by the clone.
157 std::unique_ptr<RPageSource> Clone() const final;
158
160 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
162 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
163 ~RPageSourceFile() override;
164
165 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
166 RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) final;
167 void ReleasePage(RPage &page) final;
168
169 void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final;
170
171 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
172}; // class RPageSourceFile
173
174} // namespace Internal
175
176} // namespace Experimental
177} // namespace ROOT
178
179#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Managed a set of clusters containing compressed and packed pages.
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:54
A helper class for piece-wise construction of an RNTupleDescriptor.
Uses standard C++ memory allocation for the column data pages.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
RPagePersistentSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
A thread-safe cache of column pages.
Definition RPagePool.hxx:47
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges) final
Vector commit of preprocessed pages.
std::uint64_t CommitClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options)
std::unique_ptr< RNTupleFileWriter > fWriter
void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options=RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
std::shared_ptr< RPagePool > fPagePool
Populated pages might be shared; the page pool might, at some point, be used by multiple page sources...
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
std::unique_ptr< RCluster > PrepareSingleCluster(const RCluster::RKey &clusterKey, std::vector< ROOT::Internal::RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ClusterSize_t::ValueType idxInCluster)
void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
void InitDescriptor(const RNTuple &anchor)
Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
RCluster * fCurrentCluster
The last cluster from which a page got populated. Points into fClusterPool->fPool.
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:53
STL class.
STL class.
STL class.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
Summarizes cluster-level information that are necessary to populate a certain page.
RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo
Location of the page on disk.
std::uint64_t fColumnOffset
The first element number of the page's column in the given cluster.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.