Logo ROOT  
Reference Guide
RPageStorageFile.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorageFile.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-11-21
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorageFile
17 #define ROOT7_RPageStorageFile
18 
19 #include <ROOT/RPageStorage.hxx>
20 #include <ROOT/RMiniFile.hxx>
21 #include <ROOT/RNTupleMetrics.hxx>
22 #include <ROOT/RNTupleZip.hxx>
23 #include <ROOT/RStringView.hxx>
24 
25 #include <array>
26 #include <cstdio>
27 #include <memory>
28 #include <string>
29 
30 class TFile;
31 
32 namespace ROOT {
33 
34 namespace Internal {
35 class RRawFile;
36 }
37 
38 namespace Experimental {
39 namespace Detail {
40 
41 class RCluster;
42 class RClusterPool;
43 class RPageAllocatorHeap;
44 class RPagePool;
45 
46 
47 // clang-format off
48 /**
49 \class ROOT::Experimental::Detail::RPageSinkFile
50 \ingroup NTuple
51 \brief Storage provider that write ntuple pages into a file
52 
53 The written file can be either in ROOT format or in RNTuple bare format.
54 */
55 // clang-format on
56 class RPageSinkFile : public RPageSink {
57 public:
58  static constexpr std::size_t kDefaultElementsPerPage = 10000;
59 
60 private:
62  std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
63 
64  std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
65  /// Byte offset of the first page of the current cluster
66  std::uint64_t fClusterMinOffset = std::uint64_t(-1);
67  /// Byte offset of the end of the last page of the current cluster
68  std::uint64_t fClusterMaxOffset = 0;
69  /// Helper for zipping keys and header / footer; comprises a 16MB zip buffer
71 
72 protected:
73  void CreateImpl(const RNTupleModel &model) final;
74  RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
76  void CommitDatasetImpl() final;
77 
78 public:
79  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
80  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
81  std::unique_ptr<TFile> &file);
82  RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83  virtual ~RPageSinkFile();
84 
85  RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) final;
86  void ReleasePage(RPage &page) final;
87 
88  RNTupleMetrics &GetMetrics() final { return fMetrics; }
89 };
90 
91 
92 // clang-format off
93 /**
94 \class ROOT::Experimental::Detail::RPageAllocatorFile
95 \ingroup NTuple
96 \brief Manages pages read from a the file
97 */
98 // clang-format on
100 public:
101  static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
102  static void DeletePage(const RPage& page);
103 };
104 
105 
106 // clang-format off
107 /**
108 \class ROOT::Experimental::Detail::RPageSourceFile
109 \ingroup NTuple
110 \brief Storage provider that reads ntuple pages from a file
111 */
112 // clang-format on
113 class RPageSourceFile : public RPageSource {
114 public:
115  /// Cannot process pages larger than 1MB
116  static constexpr std::size_t kMaxPageSize = 1024 * 1024;
117 
118 private:
119  /// I/O performance counters that get registered in fMetrics
120  struct RCounters {
133  };
134  std::unique_ptr<RCounters> fCounters;
135  /// Wraps the I/O counters and is observed by the RNTupleReader metrics
137 
138  /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
139  std::unique_ptr<RPageAllocatorFile> fPageAllocator;
140  /// The page pool might, at some point, be used by multiple page sources
141  std::shared_ptr<RPagePool> fPagePool;
142  /// The last cluster from which a page got populated. Points into fClusterPool->fPool
144  /// Helper to unzip pages and header/footer; comprises a 16MB unzip buffer
146  /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
147  std::unique_ptr<ROOT::Internal::RRawFile> fFile;
148  /// Takes the fFile to read ntuple blobs from it
150  /// The cluster pool asynchronously preloads the next few clusters
151  std::unique_ptr<RClusterPool> fClusterPool;
152 
153  RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
154  RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
155  ClusterSize_t::ValueType clusterIndex);
156 
157 protected:
159 
160 public:
161  RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
162  /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
163  /// The meta-data (header and footer) is reread and parsed by the clone.
164  std::unique_ptr<RPageSource> Clone() const final;
165  virtual ~RPageSourceFile();
166 
167  RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
168  RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
169  void ReleasePage(RPage &page) final;
170 
171  std::unique_ptr<RCluster> LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final;
172 
173  RNTupleMetrics &GetMetrics() final { return fMetrics; }
174 };
175 
176 
177 } // namespace Detail
178 
179 } // namespace Experimental
180 } // namespace ROOT
181 
182 #endif
ROOT::Experimental::RNTupleWriteOptions
Common user-tunable settings for storing ntuples.
Definition: RNTupleOptions.hxx:58
ROOT::Experimental::Detail::RPageSinkFile::ReleasePage
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Definition: RPageStorageFile.cxx:173
ROOT::Experimental::Detail::RNTupleAtomicCounter
A thread-safe integral performance counter.
Definition: RNTupleMetrics.hxx:125
RNTupleMetrics.hxx
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeWallUnzip
RNTuplePlainCounter & fTimeWallUnzip
Definition: RPageStorageFile.hxx:130
ROOT::Experimental::Detail::RPage
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:59
ROOT::Experimental::Detail::RNTupleTickCounter
An either thread-safe or non thread safe counter for CPU ticks.
Definition: RNTupleMetrics.hxx:186
ROOT::Experimental::Detail::RPageAllocatorFile
Manages pages read from a the file.
Definition: RPageStorageFile.hxx:99
ROOT::Experimental::Detail::RNTupleMetrics
A collection of Counter objects with a name, a unit, and a description.
Definition: RNTupleMetrics.hxx:261
ROOT::Experimental::Detail::RPageSourceFile::Clone
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
Definition: RPageStorageFile.cxx:372
ROOT::Experimental::Detail::RPageSourceFile::fMetrics
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
Definition: RPageStorageFile.hxx:136
ROOT::Experimental::Detail::RPageAllocatorFile::NewPage
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
Definition: RPageStorageFile.cxx:182
ROOT::Experimental::Detail::RPageSinkFile::fCompressor
RNTupleCompressor fCompressor
Helper for zipping keys and header / footer; comprises a 16MB zip buffer.
Definition: RPageStorageFile.hxx:70
ROOT::Experimental::Detail::RPageSinkFile::CommitClusterImpl
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
Definition: RPageStorageFile.cxx:138
basic_string_view
Definition: libcpp_string_view.h:199
ROOT::Experimental::Detail::RPageSourceFile::fFile
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Definition: RPageStorageFile.hxx:147
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzUnzip
RNTuplePlainCounter & fSzUnzip
Definition: RPageStorageFile.hxx:125
ROOT::Experimental::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:90
ROOT::Experimental::Detail::RPageSourceFile::ReleasePage
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Definition: RPageStorageFile.cxx:367
ROOT::Experimental::Detail::RNTupleDecompressor
Helper class to uncompress data blocks in the ROOT compression frame format.
Definition: RNTupleZip.hxx:150
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNRead
RNTupleAtomicCounter & fNRead
Definition: RPageStorageFile.hxx:122
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::Experimental::Detail::RPageSourceFile::fReader
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
Definition: RPageStorageFile.hxx:149
ROOT::Experimental::Detail::RPageSinkFile::kDefaultElementsPerPage
static constexpr std::size_t kDefaultElementsPerPage
Definition: RPageStorageFile.hxx:58
ROOT::Experimental::Detail::RPageSource::ColumnSet_t
std::unordered_set< DescriptorId_t > ColumnSet_t
Derived from the model (fields) that are actually being requested at a given point in time.
Definition: RPageStorage.hxx:183
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzReadOverhead
RNTupleAtomicCounter & fSzReadOverhead
Definition: RPageStorageFile.hxx:124
ROOT::Experimental::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:54
ROOT::Experimental::RNTupleReadOptions
Common user-tunable settings for reading ntuples.
Definition: RNTupleOptions.hxx:83
ROOT::Experimental::Detail::RPageSource
Abstract interface to read data from an ntuple.
Definition: RPageStorage.hxx:180
ROOT::Experimental::Detail::RPageSourceFile::PopulatePage
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
Definition: RPageStorageFile.cxx:335
ROOT::Experimental::Detail::RPageSourceFile::fClusterPool
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
Definition: RPageStorageFile.hxx:151
ROOT::Experimental::RClusterDescriptor
Meta-data for a set of ntuple clusters.
Definition: RNTupleDescriptor.hxx:160
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeCpuUnzip
RNTupleTickCounter< RNTuplePlainCounter > & fTimeCpuUnzip
Definition: RPageStorageFile.hxx:132
ROOT::Experimental::Detail::RPageSourceFile::fDecompressor
RNTupleDecompressor fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB unzip buffer.
Definition: RPageStorageFile.hxx:145
ROOT::Experimental::Detail::RPageSourceFile::AttachImpl
RNTupleDescriptor AttachImpl() final
Definition: RPageStorageFile.cxx:243
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNPageLoaded
RNTuplePlainCounter & fNPageLoaded
Definition: RPageStorageFile.hxx:127
RPageStorage.hxx
ROOT::Experimental::Detail::RPageSourceFile::GetMetrics
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
Definition: RPageStorageFile.hxx:173
ROOT::Experimental::Detail::RPageSinkFile
Storage provider that write ntuple pages into a file.
Definition: RPageStorageFile.hxx:56
ROOT::Experimental::RNTupleDescriptor
The on-storage meta-data of an ntuple.
Definition: RNTupleDescriptor.hxx:286
ROOT::Experimental::Detail::RPageSinkFile::fPageAllocator
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
Definition: RPageStorageFile.hxx:62
ROOT::Experimental::Detail::RPageSourceFile::RCounters
I/O performance counters that get registered in fMetrics.
Definition: RPageStorageFile.hxx:120
ROOT::Experimental::RClusterSize::ValueType
std::uint32_t ValueType
Definition: RNTupleUtil.hxx:58
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeCpuRead
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
Definition: RPageStorageFile.hxx:131
RMiniFile.hxx
ROOT::Experimental::Detail::RPageSourceFile::LoadCluster
std::unique_ptr< RCluster > LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final
Populates all the pages of the given cluster id and columns; it is possible that some columns do not ...
Definition: RPageStorageFile.cxx:381
RStringView.hxx
ROOT::Experimental::Detail::RPageStorage::RColumnHandle
Definition: RPageStorage.hxx:85
ROOT::Experimental::RNTupleModel
The RNTupleModel encapulates the schema of an ntuple.
Definition: RNTupleModel.hxx:58
ROOT::Experimental::Detail::RPageSourceFile::fCurrentCluster
RCluster * fCurrentCluster
The last cluster from which a page got populated. Points into fClusterPool->fPool.
Definition: RPageStorageFile.hxx:143
ROOT::Experimental::Detail::RPageSourceFile::fPageAllocator
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
Definition: RPageStorageFile.hxx:139
ROOT::Experimental::Detail::RPageSourceFile::kMaxPageSize
static constexpr std::size_t kMaxPageSize
Cannot process pages larger than 1MB.
Definition: RPageStorageFile.hxx:116
ROOT::Experimental::Detail::RPageSink
Abstract interface to write data into an ntuple.
Definition: RPageStorage.hxx:122
TFile
Definition: TFile.h:54
ROOT::Experimental::Detail::RPageSourceFile::fPagePool
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
Definition: RPageStorageFile.hxx:141
ROOT::Experimental::Detail::RPageSinkFile::fWriter
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
Definition: RPageStorageFile.hxx:64
ROOT::Experimental::Detail::RPageSinkFile::fMetrics
RNTupleMetrics fMetrics
Definition: RPageStorageFile.hxx:61
ROOT::Experimental::RClusterIndex
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:94
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNPagePopulated
RNTuplePlainCounter & fNPagePopulated
Definition: RPageStorageFile.hxx:128
RNTupleZip.hxx
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNClusterLoaded
RNTupleAtomicCounter & fNClusterLoaded
Definition: RPageStorageFile.hxx:126
ROOT::Experimental::Detail::RPageSinkFile::CommitDatasetImpl
void CommitDatasetImpl() final
Definition: RPageStorageFile.cxx:149
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fTimeWallRead
RNTupleAtomicCounter & fTimeWallRead
Definition: RPageStorageFile.hxx:129
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fSzReadPayload
RNTupleAtomicCounter & fSzReadPayload
Definition: RPageStorageFile.hxx:123
ROOT::Experimental::Detail::RPageSourceFile
Storage provider that reads ntuple pages from a file.
Definition: RPageStorageFile.hxx:113
file
Definition: file.py:1
ROOT::Experimental::Detail::RPageSinkFile::CreateImpl
void CreateImpl(const RNTupleModel &model) final
Definition: RPageStorageFile.cxx:84
ROOT::Experimental::Detail::RPageSourceFile::fCounters
std::unique_ptr< RCounters > fCounters
Definition: RPageStorageFile.hxx:134
ROOT::Experimental::RClusterDescriptor::RLocator
Generic information about the physical location of data.
Definition: RNTupleDescriptor.hxx:167
ROOT::Experimental::Detail::RPageSourceFile::RPageSourceFile
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
Definition: RPageStorageFile.cxx:201
make_cnn_model.model
model
Definition: make_cnn_model.py:6
ROOT::Experimental::Detail::RPageSinkFile::fClusterMinOffset
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
Definition: RPageStorageFile.hxx:66
ROOT::Experimental::Detail::RPageSourceFile::RCounters::fNReadV
RNTupleAtomicCounter & fNReadV
Definition: RPageStorageFile.hxx:121
ROOT::Experimental::Internal::RMiniFileReader
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition: RMiniFile.hxx:102
ROOT::Experimental::Detail::RPageAllocatorFile::DeletePage
static void DeletePage(const RPage &page)
Definition: RPageStorageFile.cxx:190
ROOT::Experimental::Detail::RPageSinkFile::GetMetrics
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
Definition: RPageStorageFile.hxx:88
ROOT::Experimental::Detail::RPageSinkFile::ReservePage
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
Definition: RPageStorageFile.cxx:165
ROOT::Experimental::ColumnId_t
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:86
ROOT::Experimental::Detail::RPageSinkFile::CommitPageImpl
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
Definition: RPageStorageFile.cxx:99
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::Experimental::Detail::RNTupleCompressor
Helper class to compress data blocks in the ROOT compression frame format.
Definition: RNTupleZip.hxx:58
ROOT::Experimental::Detail::RCluster
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
ROOT::Experimental::Detail::RPageSourceFile::PopulatePageFromCluster
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
Definition: RPageStorageFile.cxx:264
ROOT::Experimental::Detail::RPageSinkFile::fClusterMaxOffset
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
Definition: RPageStorageFile.hxx:68
ROOT::Experimental::Detail::RNTuplePlainCounter
A non thread-safe integral performance counter.
Definition: RNTupleMetrics.hxx:98