Logo ROOT  
Reference Guide
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
20#include <ROOT/RNTupleZip.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RRawFile.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <array>
26#include <cstdio>
27#include <memory>
28#include <string>
29#include <utility>
30
31class TFile;
32
33namespace ROOT {
34
35namespace Internal {
36class RRawFile;
37}
38
39namespace Experimental {
40namespace Detail {
41
42class RClusterPool;
43class RPageAllocatorHeap;
44class RPagePool;
45
46
47// clang-format off
48/**
49\class ROOT::Experimental::Detail::RPageSinkFile
50\ingroup NTuple
51\brief Storage provider that write ntuple pages into a file
52
53The written file can be either in ROOT format or in RNTuple bare format.
54*/
55// clang-format on
56class RPageSinkFile : public RPageSink {
57private:
58 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
59
60 std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
61 /// Byte offset of the first page of the current cluster
62 std::uint64_t fClusterMinOffset = std::uint64_t(-1);
63 /// Byte offset of the end of the last page of the current cluster
64 std::uint64_t fClusterMaxOffset = 0;
65 /// Number of bytes committed to storage in the current cluster
66 std::uint64_t fNBytesCurrentCluster = 0;
67 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
68
70 std::size_t bytesPacked);
71
72protected:
73 void CreateImpl(const RNTupleModel &model) final;
74 RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
76 const RPageStorage::RSealedPage &sealedPage) final;
77 std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final;
78 void CommitDatasetImpl() final;
79
80public:
81 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
82 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
83 std::unique_ptr<TFile> &file);
84 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
85 RPageSinkFile(const RPageSinkFile&) = delete;
86 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
88 RPageSinkFile& operator=(RPageSinkFile&&) = default;
89 virtual ~RPageSinkFile();
90
91 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
92 void ReleasePage(RPage &page) final;
93};
94
95
96// clang-format off
97/**
98\class ROOT::Experimental::Detail::RPageAllocatorFile
99\ingroup NTuple
100\brief Manages pages read from a the file
101*/
102// clang-format on
104public:
105 static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
106 static void DeletePage(const RPage& page);
107};
108
109
110// clang-format off
111/**
112\class ROOT::Experimental::Detail::RPageSourceFile
113\ingroup NTuple
114\brief Storage provider that reads ntuple pages from a file
115*/
116// clang-format on
118public:
119 /// Cannot process pages larger than 1MB
120 static constexpr std::size_t kMaxPageSize = 1024 * 1024;
121
122private:
123 /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
124 std::unique_ptr<RPageAllocatorFile> fPageAllocator;
125 /// The page pool might, at some point, be used by multiple page sources
126 std::shared_ptr<RPagePool> fPagePool;
127 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
128 RCluster *fCurrentCluster = nullptr;
129 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
130 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
131 /// Takes the fFile to read ntuple blobs from it
133 /// The cluster pool asynchronously preloads the next few clusters
134 std::unique_ptr<RClusterPool> fClusterPool;
135
136 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
137 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
138 ClusterSize_t::ValueType idxInCluster);
139
140 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
141 /// read requests for a given cluster and columns. The reead requests are appended to
142 /// the provided vector. This way, requests can be collected for multiple clusters before
143 /// sending them to RRawFile::ReadV().
144 std::unique_ptr<RCluster> PrepareSingleCluster(
145 const RCluster::RKey &clusterKey,
146 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
147
148protected:
149 RNTupleDescriptor AttachImpl() final;
150 void UnzipClusterImpl(RCluster *cluster) final;
151
152public:
153 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
154 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
155 /// The meta-data (header and footer) is reread and parsed by the clone.
156 std::unique_ptr<RPageSource> Clone() const final;
157
159 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
161 RPageSourceFile& operator=(RPageSourceFile&&) = default;
162 virtual ~RPageSourceFile();
163
164 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
165 RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
166 void ReleasePage(RPage &page) final;
167
168 void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex,
169 RSealedPage &sealedPage) final;
170
171 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
172};
173
174
175} // namespace Detail
176
177} // namespace Experimental
178} // namespace ROOT
179
180#endif
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
Manages pages read from a the file.
Storage provider that write ntuple pages into a file.
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
void CreateImpl(const RNTupleModel &model) final
std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final
Returns the number of bytes written to storage (excluding metadata)
RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options)
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
RClusterDescriptor::RLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage) final
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RClusterDescriptor::RLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition: RMiniFile.hxx:108
Meta-data for a set of ntuple clusters.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:77
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: file.py:1
The identifiers that specifies the content of a (partial) cluster.
Definition: RCluster.hxx:158
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.