Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
20#include <ROOT/RNTuple.hxx>
22#include <ROOT/RNTupleZip.hxx>
23#include <ROOT/RPageStorage.hxx>
24#include <ROOT/RRawFile.hxx>
25#include <string_view>
26
27#include <array>
28#include <cstdio>
29#include <memory>
30#include <optional>
31#include <string>
32#include <utility>
33
34class TDirectory;
35
36namespace ROOT {
37class RNTuple; // for making RPageSourceFile a friend of RNTuple
38class RNTupleLocator;
39
40namespace Internal {
41class RRawFile;
42}
43
44namespace Experimental {
45
46namespace Internal {
47class RClusterPool;
48class RPageAllocatorHeap;
49
50// clang-format off
51/**
52\class ROOT::Experimental::Internal::RPageSinkFile
53\ingroup NTuple
54\brief Storage provider that write ntuple pages into a file
55
56The written file can be either in ROOT format or in RNTuple bare format.
57*/
58// clang-format on
60private:
61 // A set of pages to be committed together in a vector write.
62 // Currently we assume they're all sequential (although they may span multiple ranges).
63 struct CommitBatch {
64 /// The list of pages to commit
65 std::vector<const RSealedPage *> fSealedPages;
66 /// Total size in bytes of the batch
67 size_t fSize;
68 /// Total uncompressed size of the elements in the page batch
70 };
71
72 std::unique_ptr<RNTupleFileWriter> fWriter;
73 /// Number of bytes committed to storage in the current cluster
74 std::uint64_t fNBytesCurrentCluster = 0;
75 RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
76
77 /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
78 /// key. It is not strictly necessary to write and read the sealed page.
80
81 /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
82 /// contained in `batch`. The locators for the written pages are appended to `locators`.
83 /// This procedure also updates some internal metrics of the page sink, hence it's not const.
84 /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
85 void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
86
87protected:
89 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
93 std::vector<RNTupleLocator>
94 CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
95 std::uint64_t StageClusterImpl() final;
98 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
99
100public:
101 RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
108}; // class RPageSinkFile
109
110// clang-format off
111/**
112\class ROOT::Experimental::Internal::RPageSourceFile
113\ingroup NTuple
114\brief Storage provider that reads ntuple pages from a file
115*/
116// clang-format on
118 friend class ROOT::RNTuple;
119
120private:
121 /// Holds the uncompressed header and footer
123 std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
124 void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer
125 void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer
126
127 /// Called at the end of Attach(), i.e. when the header and footer are processed
128 void Reset()
129 {
130 RStructureBuffer empty;
131 std::swap(empty, *this);
132 }
133 };
134
135 /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
136 std::optional<RNTuple> fAnchor;
137 /// The last cluster from which a page got loaded. Points into fClusterPool->fPool
138 RCluster *fCurrentCluster = nullptr;
139 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
140 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
141 /// Takes the fFile to read ntuple blobs from it
143 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
145 /// The cluster pool asynchronously preloads the next few clusters
146 std::unique_ptr<RClusterPool> fClusterPool;
147 /// Populated by LoadStructureImpl(), reset at the end of Attach()
149
150 RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
151
152 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
153 /// read requests for a given cluster and columns. The reead requests are appended to
154 /// the provided vector. This way, requests can be collected for multiple clusters before
155 /// sending them to RRawFile::ReadV().
156 std::unique_ptr<RCluster>
157 PrepareSingleCluster(const RCluster::RKey &clusterKey, std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
158
159protected:
160 void LoadStructureImpl() final;
161 RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
162 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
163 std::unique_ptr<RPageSource> CloneImpl() const final;
164
167
168public:
169 RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
170 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
171 const ROOT::RNTupleReadOptions &options);
172 /// Used from the RNTuple class to build a datasource if the anchor is already available.
173 /// Requires the RNTuple object to be streamed from a file.
174 static std::unique_ptr<RPageSourceFile>
175 CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
176
182
183 void
185
186 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
187}; // class RPageSourceFile
188
189} // namespace Internal
190
191} // namespace Experimental
192} // namespace ROOT
193
194#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:59
A helper class for piece-wise construction of an RNTupleDescriptor.
A helper class for serializing and deserialization of the RNTuple binary format.
Base class for a sink with a physical storage backend.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Reference to a page stored in the page pool.
Storage provider that write ntuple pages into a file.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
std::unique_ptr< RNTupleFileWriter > fWriter
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
Storage provider that reads ntuple pages from a file.
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RStructureBuffer fStructureBuffer
Populated by LoadStructureImpl(), reset at the end of Attach()
std::optional< RNTuple > fAnchor
Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:47
The on-storage meta-data of an ntuple.
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Generic information about the physical location of data.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
Describe directory structure in memory.
Definition TDirectory.h:45
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
size_t fBytesPacked
Total uncompressed size of the elements in the page batch.
std::vector< const RSealedPage * > fSealedPages
The list of pages to commit.
std::unique_ptr< unsigned char[]> fBuffer
single buffer for both header and footer
void Reset()
Called at the end of Attach(), i.e. when the header and footer are processed.
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).