Logo ROOT  
Reference Guide
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RRawFile.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <array>
27#include <cstdio>
28#include <memory>
29#include <string>
30#include <utility>
31
32class TFile;
33
34namespace ROOT {
35
36namespace Internal {
37class RRawFile;
38}
39
40namespace Experimental {
41class RNTuple; // for making RPageSourceFile a friend of RNTuple
42
43namespace Detail {
44
45class RClusterPool;
46class RPageAllocatorHeap;
47class RPagePool;
48
49
50// clang-format off
51/**
52\class ROOT::Experimental::Detail::RPageSinkFile
53\ingroup NTuple
54\brief Storage provider that write ntuple pages into a file
55
56The written file can be either in ROOT format or in RNTuple bare format.
57*/
58// clang-format on
59class RPageSinkFile : public RPageSink {
60private:
61 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
62
63 std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
64 /// Number of bytes committed to storage in the current cluster
65 std::uint64_t fNBytesCurrentCluster = 0;
66 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
67
69 std::size_t bytesPacked);
70
71protected:
72 void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) final;
73 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
75 std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final;
76 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
77 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
78
79public:
82 std::unique_ptr<TFile> &file);
83 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
84 RPageSinkFile(const RPageSinkFile&) = delete;
88 virtual ~RPageSinkFile();
89
90 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
91 void ReleasePage(RPage &page) final;
92};
93
94
95// clang-format off
96/**
97\class ROOT::Experimental::Detail::RPageAllocatorFile
98\ingroup NTuple
99\brief Manages pages read from a the file
100*/
101// clang-format on
103public:
104 static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
105 static void DeletePage(const RPage& page);
106};
107
108
109// clang-format off
110/**
111\class ROOT::Experimental::Detail::RPageSourceFile
112\ingroup NTuple
113\brief Storage provider that reads ntuple pages from a file
114*/
115// clang-format on
118
119private:
120 /// Summarizes cluster-level information that are necessary to populate a certain page.
121 /// Used by PopulatePageFromCluster().
124 /// Location of the page on disk
126 /// The first element number of the page's column in the given cluster
127 std::uint64_t fColumnOffset = 0;
128 };
129
130 /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
131 std::unique_ptr<RPageAllocatorFile> fPageAllocator;
132 /// The page pool might, at some point, be used by multiple page sources
133 std::shared_ptr<RPagePool> fPagePool;
134 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
136 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
137 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
138 /// Takes the fFile to read ntuple blobs from it
140 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
142 /// The cluster pool asynchronously preloads the next few clusters
143 std::unique_ptr<RClusterPool> fClusterPool;
144
145 /// Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder
147
148 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
149 /// Used from the RNTuple class to build a datasource if the anchor is already available
150 static std::unique_ptr<RPageSourceFile> CreateFromAnchor(const Internal::RFileNTupleAnchor &anchor,
151 std::string_view path, const RNTupleReadOptions &options);
152 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
153 ClusterSize_t::ValueType idxInCluster);
154
155 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
156 /// read requests for a given cluster and columns. The reead requests are appended to
157 /// the provided vector. This way, requests can be collected for multiple clusters before
158 /// sending them to RRawFile::ReadV().
159 std::unique_ptr<RCluster> PrepareSingleCluster(
160 const RCluster::RKey &clusterKey,
161 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
162
163protected:
165 void UnzipClusterImpl(RCluster *cluster) final;
166
167public:
168 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
169 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
170 /// The meta-data (header and footer) is reread and parsed by the clone.
171 std::unique_ptr<RPageSource> Clone() const final;
172
174 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
176 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
177 virtual ~RPageSourceFile();
178
179 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
180 RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
181 void ReleasePage(RPage &page) final;
182
183 void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex,
184 RSealedPage &sealedPage) final;
185
186 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
187};
188
189
190} // namespace Detail
191
192} // namespace Experimental
193} // namespace ROOT
194
195#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
Manages pages read from a the file.
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
Storage provider that write ntuple pages into a file.
RNTupleLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RPageSinkFile & operator=(const RPageSinkFile &)=delete
std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final
Returns the number of bytes written to storage (excluding metadata)
void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) final
RPageSinkFile(const RPageSinkFile &)=delete
RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options)
RPageSinkFile & operator=(RPageSinkFile &&)=default
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
RPageSinkFile(RPageSinkFile &&)=default
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
RCluster * fCurrentCluster
The last cluster from which a page got populated. Points into fClusterPool->fPool.
void InitDescriptor(const Internal::RFileNTupleAnchor &anchor)
Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder.
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const Internal::RFileNTupleAnchor &anchor, std::string_view path, const RNTupleReadOptions &options)
Used from the RNTuple class to build a datasource if the anchor is already available.
std::unique_ptr< RCluster > PrepareSingleCluster(const RCluster::RKey &clusterKey, std::vector< ROOT::Internal::RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
void UnzipClusterImpl(RCluster *cluster) final
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ClusterSize_t::ValueType idxInCluster)
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition: RMiniFile.hxx:102
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:87
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition: RNTuple.hxx:491
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:47
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:79
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition: file.py:1
The identifiers that specifies the content of a (partial) cluster.
Definition: RCluster.hxx:158
Summarizes cluster-level information that are necessary to populate a certain page.
RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo
Location of the page on disk.
std::uint64_t fColumnOffset
The first element number of the page's column in the given cluster.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Entry point for an RNTuple in a ROOT file.
Definition: RMiniFile.hxx:65
Generic information about the physical location of data.