Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RRawFile.hxx>
24#include <string_view>
25
26#include <array>
27#include <cstdio>
28#include <memory>
29#include <string>
30#include <utility>
31
32class TFile;
33
34namespace ROOT {
35
36namespace Internal {
37class RRawFile;
38}
39
40namespace Experimental {
41class RNTuple; // for making RPageSourceFile a friend of RNTuple
42
43namespace Internal {
44class RClusterPool;
45class RPageAllocatorHeap;
46
47// clang-format off
48/**
49\class ROOT::Experimental::Internal::RPageSinkFile
50\ingroup NTuple
51\brief Storage provider that write ntuple pages into a file
52
53The written file can be either in ROOT format or in RNTuple bare format.
54*/
55// clang-format on
57private:
58 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
59
60 std::unique_ptr<RNTupleFileWriter> fWriter;
61 /// Number of bytes committed to storage in the current cluster
62 std::uint64_t fNBytesCurrentCluster = 0;
63 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
64
66 std::size_t bytesPacked);
67
68protected:
70 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
71 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
73 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
74 std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges) final;
75 std::uint64_t CommitClusterImpl() final;
76 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
78 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
79
80public:
81 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
82 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83 RPageSinkFile(const RPageSinkFile&) = delete;
84 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
86 RPageSinkFile& operator=(RPageSinkFile&&) = default;
87 ~RPageSinkFile() override;
88
89 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
90 void ReleasePage(RPage &page) final;
91}; // class RPageSinkFile
92
93// clang-format off
94/**
95\class ROOT::Experimental::Internal::RPageSourceFile
96\ingroup NTuple
97\brief Storage provider that reads ntuple pages from a file
98*/
99// clang-format on
102
103private:
104 /// Summarizes cluster-level information that are necessary to populate a certain page.
105 /// Used by PopulatePageFromCluster().
107 DescriptorId_t fClusterId = 0;
108 /// Location of the page on disk
110 /// The first element number of the page's column in the given cluster
111 std::uint64_t fColumnOffset = 0;
112 };
113
114 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
115 RCluster *fCurrentCluster = nullptr;
116 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
117 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
118 /// Takes the fFile to read ntuple blobs from it
120 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
122 /// The cluster pool asynchronously preloads the next few clusters
123 std::unique_ptr<RClusterPool> fClusterPool;
124
125 /// Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder
126 void InitDescriptor(const RNTuple &anchor);
127
128 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
129
130 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
131 ClusterSize_t::ValueType idxInCluster);
132
133 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
134 /// read requests for a given cluster and columns. The reead requests are appended to
135 /// the provided vector. This way, requests can be collected for multiple clusters before
136 /// sending them to RRawFile::ReadV().
137 std::unique_ptr<RCluster> PrepareSingleCluster(
138 const RCluster::RKey &clusterKey,
139 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
140
141protected:
142 RNTupleDescriptor AttachImpl() final;
143
144public:
145 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
146 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
147 const RNTupleReadOptions &options);
148 /// Used from the RNTuple class to build a datasource if the anchor is already available.
149 /// Requires the RNTuple object to be streamed from a file.
150 static std::unique_ptr<RPageSourceFile>
151 CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options = RNTupleReadOptions());
152 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
153 /// The meta-data (header and footer) is reread and parsed by the clone.
154 std::unique_ptr<RPageSource> Clone() const final;
155
157 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
159 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
160 ~RPageSourceFile() override;
161
162 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
163 RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) final;
164 void ReleasePage(RPage &page) final;
165
166 void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final;
167
168 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
169}; // class RPageSourceFile
170
171} // namespace Internal
172
173} // namespace Experimental
174} // namespace ROOT
175
176#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:54
A helper class for piece-wise construction of an RNTupleDescriptor.
Base class for a sink with a physical storage backend.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Storage provider that write ntuple pages into a file.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges) final
Vector commit of preprocessed pages.
std::uint64_t CommitClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
std::unique_ptr< RNTupleFileWriter > fWriter
Storage provider that reads ntuple pages from a file.
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
Summarizes cluster-level information that are necessary to populate a certain page.
RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo
Location of the page on disk.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.