Logo ROOT  
Reference Guide
RPageStorageFile.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorageFile.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-11-21
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorageFile
17 #define ROOT7_RPageStorageFile
18 
19 #include <ROOT/RMiniFile.hxx>
20 #include <ROOT/RNTupleMetrics.hxx>
21 #include <ROOT/RNTupleZip.hxx>
22 #include <ROOT/RPageStorage.hxx>
23 #include <ROOT/RStringView.hxx>
24 
25 #include <array>
26 #include <cstdio>
27 #include <memory>
28 #include <string>
29 #include <utility>
30 
31 class TFile;
32 
33 namespace ROOT {
34 
35 namespace Internal {
36 class RRawFile;
37 }
38 
39 namespace Experimental {
40 namespace Detail {
41 
42 class RClusterPool;
43 class RPageAllocatorHeap;
44 class RPagePool;
45 
46 
47 // clang-format off
48 /**
49 \class ROOT::Experimental::Detail::RPageSinkFile
50 \ingroup NTuple
51 \brief Storage provider that write ntuple pages into a file
52 
53 The written file can be either in ROOT format or in RNTuple bare format.
54 */
55 // clang-format on
56 class RPageSinkFile : public RPageSink {
57 public:
58  static constexpr std::size_t kDefaultElementsPerPage = 10000;
59 
60 private:
62  std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
63 
64  std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
65  /// Byte offset of the first page of the current cluster
66  std::uint64_t fClusterMinOffset = std::uint64_t(-1);
67  /// Byte offset of the end of the last page of the current cluster
68  std::uint64_t fClusterMaxOffset = 0;
69  /// Helper for zipping keys and header / footer; comprises a 16MB zip buffer
71 
72 protected:
73  void CreateImpl(const RNTupleModel &model) final;
74  RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
76  void CommitDatasetImpl() final;
77 
78 public:
79  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
80  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
81  std::unique_ptr<TFile> &file);
82  RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83  RPageSinkFile(const RPageSinkFile&) = delete;
84  RPageSinkFile& operator=(const RPageSinkFile&) = delete;
86  RPageSinkFile& operator=(RPageSinkFile&&) = default;
87  virtual ~RPageSinkFile();
88 
89  RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) final;
90  void ReleasePage(RPage &page) final;
91 
92  RNTupleMetrics &GetMetrics() final { return fMetrics; }
93 };
94 
95 
96 // clang-format off
97 /**
98 \class ROOT::Experimental::Detail::RPageAllocatorFile
99 \ingroup NTuple
100 \brief Manages pages read from a the file
101 */
102 // clang-format on
104 public:
105  static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
106  static void DeletePage(const RPage& page);
107 };
108 
109 
110 // clang-format off
111 /**
112 \class ROOT::Experimental::Detail::RPageSourceFile
113 \ingroup NTuple
114 \brief Storage provider that reads ntuple pages from a file
115 */
116 // clang-format on
117 class RPageSourceFile : public RPageSource {
118 public:
119  /// Cannot process pages larger than 1MB
120  static constexpr std::size_t kMaxPageSize = 1024 * 1024;
121 
122 private:
123  /// I/O performance counters that get registered in fMetrics
124  struct RCounters {
142  };
143  std::unique_ptr<RCounters> fCounters;
144  /// Wraps the I/O counters and is observed by the RNTupleReader metrics
146 
147  /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
148  std::unique_ptr<RPageAllocatorFile> fPageAllocator;
149  /// The page pool might, at some point, be used by multiple page sources
150  std::shared_ptr<RPagePool> fPagePool;
151  /// The last cluster from which a page got populated. Points into fClusterPool->fPool
153  /// Helper to unzip pages and header/footer; comprises a 16MB unzip buffer
155  /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
156  std::unique_ptr<ROOT::Internal::RRawFile> fFile;
157  /// Takes the fFile to read ntuple blobs from it
159  /// The cluster pool asynchronously preloads the next few clusters
160  std::unique_ptr<RClusterPool> fClusterPool;
161 
162  RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
163  RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
164  ClusterSize_t::ValueType idxInCluster);
165 
166 protected:
168  void UnzipClusterImpl(RCluster *cluster) final;
169 
170 public:
171  RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
172  /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
173  /// The meta-data (header and footer) is reread and parsed by the clone.
174  std::unique_ptr<RPageSource> Clone() const final;
175 
177  RPageSourceFile& operator=(const RPageSourceFile&) = delete;
179  RPageSourceFile& operator=(RPageSourceFile&&) = default;
180  virtual ~RPageSourceFile();
181 
182  RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
183  RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
184  void ReleasePage(RPage &page) final;
185 
186  std::unique_ptr<RCluster> LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final;
187 
188  RNTupleMetrics &GetMetrics() final { return fMetrics; }
189 };
190 
191 
192 } // namespace Detail
193 
194 } // namespace Experimental
195 } // namespace ROOT
196 
197 #endif
ROOT::Experimental::RNTupleWriteOptions
Common user-tunable settings for storing ntuples.
Definition: RNTupleOptions.hxx:46
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeCpuUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
Definition: RPageStorageFile.hxx:136
ROOT::Experimental::Detail::RPageSinkFile::ReleasePage
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Definition: RPageStorageFile.cxx:179
ROOT::Experimental::Detail::RNTupleAtomicCounter
A thread-safe integral performance counter.
Definition: RNTupleMetrics.hxx:111
RNTupleMetrics.hxx
ROOT::Experimental::Detail::RPage
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ROOT::Experimental::Detail::RNTupleTickCounter
An either thread-safe or non thread safe counter for CPU ticks.
Definition: RNTupleMetrics.hxx:210
ROOT::Experimental::Detail::RPageAllocatorFile
Manages pages read from a the file.
Definition: RPageStorageFile.hxx:103
ROOT::Experimental::Detail::RNTupleMetrics
A collection of Counter objects with a name, a unit, and a description.
Definition: RNTupleMetrics.hxx:285
ROOT::Experimental::Detail::RPageSourceFile::Clone
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
Definition: RPageStorageFile.cxx:452
ROOT::Experimental::Detail::RPageSourceFile::fMetrics
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
Definition: RPageStorageFile.hxx:145
ROOT::Experimental::Detail::RPageAllocatorFile::NewPage
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
Definition: RPageStorageFile.cxx:188
ROOT::Experimental::Detail::RPageSinkFile::fCompressor
RNTupleCompressor fCompressor
Helper for zipping keys and header / footer; comprises a 16MB zip buffer.
Definition: RPageStorageFile.hxx:70
ROOT::Experimental::Detail::RPageSinkFile::CommitClusterImpl
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
Definition: RPageStorageFile.cxx:144
basic_string_view
Definition: libcpp_string_view.h:199
ROOT::Experimental::Detail::RPageSourceFile::fFile
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Definition: RPageStorageFile.hxx:156
ROOT::Experimental::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:91
ROOT::Experimental::Detail::RPageSourceFile::ReleasePage
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Definition: RPageStorageFile.cxx:447
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fCompressionRatio
RNTupleCalcPerf & fCompressionRatio
Definition: RPageStorageFile.hxx:141
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNPageLoaded
RNTupleAtomicCounter & fNPageLoaded
Definition: RPageStorageFile.hxx:131
ROOT::Experimental::Detail::RNTupleDecompressor
Helper class to uncompress data blocks in the ROOT compression frame format.
Definition: RNTupleZip.hxx:134
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNRead
RNTupleAtomicCounter & fNRead
Definition: RPageStorageFile.hxx:126
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::Experimental::Detail::RPageSourceFile::fReader
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
Definition: RPageStorageFile.hxx:158
ROOT::Experimental::Detail::RNTupleCalcPerf
A metric element that computes its floating point value from other counters.
Definition: RNTupleMetrics.hxx:169
ROOT::Experimental::Detail::RPageSinkFile::kDefaultElementsPerPage
static constexpr std::size_t kDefaultElementsPerPage
Definition: RPageStorageFile.hxx:58
ROOT::Experimental::Detail::RPageSource::ColumnSet_t
std::unordered_set< DescriptorId_t > ColumnSet_t
Derived from the model (fields) that are actually being requested at a given point in time.
Definition: RPageStorage.hxx:196
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fBandwidthReadUncompressed
RNTupleCalcPerf & fBandwidthReadUncompressed
Definition: RPageStorageFile.hxx:137
ROOT::Internal::RRawFile
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:43
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzReadOverhead
RNTupleAtomicCounter & fSzReadOverhead
Definition: RPageStorageFile.hxx:128
ROOT::Experimental::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:55
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fBandwidthReadCompressed
RNTupleCalcPerf & fBandwidthReadCompressed
Definition: RPageStorageFile.hxx:138
ROOT::Experimental::RNTupleReadOptions
Common user-tunable settings for reading ntuples.
Definition: RNTupleOptions.hxx:71
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNPagePopulated
RNTupleAtomicCounter & fNPagePopulated
Definition: RPageStorageFile.hxx:132
ROOT::Experimental::Detail::RPageSource
Abstract interface to read data from an ntuple.
Definition: RPageStorage.hxx:193
ROOT::Experimental::Detail::RPageSourceFile::PopulatePage
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
Definition: RPageStorageFile.cxx:415
ROOT::Experimental::Detail::RPageSourceFile::fClusterPool
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
Definition: RPageStorageFile.hxx:160
ROOT::Experimental::RClusterDescriptor
Meta-data for a set of ntuple clusters.
Definition: RNTupleDescriptor.hxx:153
ROOT::Experimental::Detail::RPageSourceFile::fDecompressor
RNTupleDecompressor fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB unzip buffer.
Definition: RPageStorageFile.hxx:154
ROOT::Experimental::Detail::RPageSourceFile::AttachImpl
RNTupleDescriptor AttachImpl() final
Definition: RPageStorageFile.cxx:317
RPageStorage.hxx
ROOT::Experimental::Detail::RPageSourceFile::GetMetrics
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
Definition: RPageStorageFile.hxx:188
ROOT::Experimental::Detail::RPageSinkFile
Storage provider that write ntuple pages into a file.
Definition: RPageStorageFile.hxx:56
ROOT::Experimental::RNTupleDescriptor
The on-storage meta-data of an ntuple.
Definition: RNTupleDescriptor.hxx:279
ROOT::Experimental::Detail::RPageSinkFile::fPageAllocator
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
Definition: RPageStorageFile.hxx:62
ROOT::Experimental::Detail::RPageSourceFile::RCounters
I/O performance counters that get registered in fMetrics.
Definition: RPageStorageFile.hxx:124
ROOT::Experimental::RClusterSize::ValueType
std::uint32_t ValueType
Definition: RNTupleUtil.hxx:59
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeCpuRead
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
Definition: RPageStorageFile.hxx:135
RMiniFile.hxx
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fBandwidthUnzip
RNTupleCalcPerf & fBandwidthUnzip
Definition: RPageStorageFile.hxx:139
ROOT::Experimental::Detail::RPageSourceFile::LoadCluster
std::unique_ptr< RCluster > LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final
Populates all the pages of the given cluster id and columns; it is possible that some columns do not ...
Definition: RPageStorageFile.cxx:461
RStringView.hxx
ROOT::Experimental::Detail::RPageStorage::RColumnHandle
Definition: RPageStorage.hxx:90
ROOT::Experimental::RNTupleModel
The RNTupleModel encapulates the schema of an ntuple.
Definition: RNTupleModel.hxx:46
ROOT::Experimental::Detail::RPageSourceFile::fCurrentCluster
RCluster * fCurrentCluster
The last cluster from which a page got populated. Points into fClusterPool->fPool.
Definition: RPageStorageFile.hxx:152
ROOT::Experimental::Detail::RPageSourceFile::fPageAllocator
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
Definition: RPageStorageFile.hxx:148
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzUnzip
RNTupleAtomicCounter & fSzUnzip
Definition: RPageStorageFile.hxx:129
ROOT::Experimental::Detail::RPageSourceFile::kMaxPageSize
static constexpr std::size_t kMaxPageSize
Cannot process pages larger than 1MB.
Definition: RPageStorageFile.hxx:120
ROOT::Experimental::Detail::RPageSink
Abstract interface to write data into an ntuple.
Definition: RPageStorage.hxx:129
TFile
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
ROOT::Experimental::Detail::RPageSourceFile::fPagePool
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
Definition: RPageStorageFile.hxx:150
ROOT::Experimental::Detail::RPageSinkFile::fWriter
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
Definition: RPageStorageFile.hxx:64
ROOT::Experimental::Detail::RPageSourceFile::UnzipClusterImpl
void UnzipClusterImpl(RCluster *cluster) final
Definition: RPageStorageFile.cxx:590
ROOT::Experimental::Detail::RPageSinkFile::fMetrics
RNTupleMetrics fMetrics
Definition: RPageStorageFile.hxx:61
ROOT::Experimental::RClusterIndex
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:95
ROOT::Experimental::Detail::RPageSourceFile::PopulatePageFromCluster
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType idxInCluster)
Definition: RPageStorageFile.cxx:338
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeWallUnzip
RNTupleAtomicCounter & fTimeWallUnzip
Definition: RPageStorageFile.hxx:134
RNTupleZip.hxx
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNClusterLoaded
RNTupleAtomicCounter & fNClusterLoaded
Definition: RPageStorageFile.hxx:130
ROOT::Experimental::Detail::RPageSinkFile::CommitDatasetImpl
void CommitDatasetImpl() final
Definition: RPageStorageFile.cxx:155
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeWallRead
RNTupleAtomicCounter & fTimeWallRead
Definition: RPageStorageFile.hxx:133
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzReadPayload
RNTupleAtomicCounter & fSzReadPayload
Definition: RPageStorageFile.hxx:127
ROOT::Experimental::Detail::RPageSourceFile
Storage provider that reads ntuple pages from a file.
Definition: RPageStorageFile.hxx:117
file
Definition: file.py:1
ROOT::Experimental::Detail::RPageSinkFile::CreateImpl
void CreateImpl(const RNTupleModel &model) final
Definition: RPageStorageFile.cxx:90
ROOT::Experimental::Detail::RPageSourceFile::fCounters
std::unique_ptr< RCounters > fCounters
Definition: RPageStorageFile.hxx:143
ROOT::Experimental::RClusterDescriptor::RLocator
Generic information about the physical location of data.
Definition: RNTupleDescriptor.hxx:160
ROOT::Experimental::Detail::RPageSourceFile::RPageSourceFile
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
Definition: RPageStorageFile.cxx:207
make_cnn_model.model
model
Definition: make_cnn_model.py:6
ROOT::Experimental::Detail::RPageSinkFile::fClusterMinOffset
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
Definition: RPageStorageFile.hxx:66
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNReadV
RNTupleAtomicCounter & fNReadV
Definition: RPageStorageFile.hxx:125
ROOT::Experimental::Internal::RMiniFileReader
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition: RMiniFile.hxx:108
ROOT::Experimental::Detail::RPageAllocatorFile::DeletePage
static void DeletePage(const RPage &page)
Definition: RPageStorageFile.cxx:196
ROOT::Experimental::Detail::RPageSinkFile::GetMetrics
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
Definition: RPageStorageFile.hxx:92
ROOT::Experimental::Detail::RPageSinkFile::ReservePage
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
Definition: RPageStorageFile.cxx:171
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fFractionReadOverhead
RNTupleCalcPerf & fFractionReadOverhead
Definition: RPageStorageFile.hxx:140
ROOT::Experimental::ColumnId_t
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:87
ROOT::Experimental::Detail::RPageSinkFile::CommitPageImpl
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
Definition: RPageStorageFile.cxx:105
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4
ROOT::Experimental::Detail::RNTupleCompressor
Helper class to compress data blocks in the ROOT compression frame format.
Definition: RNTupleZip.hxx:40
ROOT::Experimental::Detail::RCluster
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
ROOT::Experimental::Detail::RPageSinkFile::fClusterMaxOffset
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
Definition: RPageStorageFile.hxx:68