Logo ROOT  
Reference Guide
RPageStorage.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorage.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-07-19
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorage
17 #define ROOT7_RPageStorage
18 
20 #include <ROOT/RNTupleOptions.hxx>
21 #include <ROOT/RNTupleUtil.hxx>
22 #include <ROOT/RPage.hxx>
23 #include <ROOT/RPageAllocator.hxx>
24 #include <ROOT/RStringView.hxx>
25 
26 #include <atomic>
27 #include <cstddef>
28 #include <memory>
29 #include <unordered_set>
30 
31 namespace ROOT {
32 namespace Experimental {
33 
34 class RNTupleModel;
35 // TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
36 
37 namespace Detail {
38 
39 class RCluster;
40 class RColumn;
41 class RPagePool;
42 class RFieldBase;
43 class RNTupleMetrics;
44 
45 enum class EPageStorageType {
46  kSink,
47  kSource,
48 };
49 
50 // clang-format off
51 /**
52 \class ROOT::Experimental::Detail::RPageStorage
53 \ingroup NTuple
54 \brief Common functionality of an ntuple storage for both reading and writing
55 
56 The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
57 an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
58 */
59 // clang-format on
60 class RPageStorage {
61 protected:
62  std::string fNTupleName;
63 
64 public:
66  RPageStorage(const RPageStorage &other) = delete;
67  RPageStorage& operator =(const RPageStorage &other) = delete;
68  virtual ~RPageStorage();
69 
70  /// Whether the concrete implementation is a sink or a source
71  virtual EPageStorageType GetType() = 0;
72 
73  struct RColumnHandle {
75  const RColumn *fColumn = nullptr;
76 
77  /// Returns true for a valid column handle; fColumn and fId should always either both
78  /// be valid or both be invalid.
79  operator bool() const { return fId != kInvalidDescriptorId && fColumn; }
80  };
81  /// The column handle identifies a column with the current open page storage
82  using ColumnHandle_t = RColumnHandle;
83 
84  /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
85  /// When writing, every column can only be attached once.
86  virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
87  /// Unregisters a column. A page source decreases the reference counter for the corresponding active column.
88  /// For a page sink, dropping columns is currently a no-op.
89  virtual void DropColumn(ColumnHandle_t columnHandle) = 0;
90 
91  /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
92  /// of allocating pages.
93  virtual void ReleasePage(RPage &page) = 0;
94 
95  /// Returns an empty metrics. Page storage implementations usually have their own metrics.
96  virtual RNTupleMetrics &GetMetrics();
97 };
98 
99 // clang-format off
100 /**
101 \class ROOT::Experimental::Detail::RPageSink
102 \ingroup NTuple
103 \brief Abstract interface to write data into an ntuple
104 
105 The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
106 The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
107 up to the given entry number are committed.
108 */
109 // clang-format on
110 class RPageSink : public RPageStorage {
111 protected:
113 
114  /// Building the ntuple descriptor while writing is done in the same way for all the storage sink implementations.
115  /// Field, column, cluster ids and page indexes per cluster are issued sequentially starting with 0
120  /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
121  std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
122  /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
123  std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
125 
126  virtual void CreateImpl(const RNTupleModel &model) = 0;
127  virtual RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
129  virtual void CommitDatasetImpl() = 0;
130 
131 public:
132  RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
133  virtual ~RPageSink();
134  /// Guess the concrete derived page source from the file name (location)
135  static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
138 
139  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
140  void DropColumn(ColumnHandle_t /*columnHandle*/) final {}
141 
142  /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
143  /// To do so, Create() calls CreateImpl() after updating the descriptor.
144  /// Create() associates column handles to the columns referenced by the model
145  void Create(RNTupleModel &model);
146  /// Write a page to the storage. The column must have been added before.
147  void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
148  /// Finalize the current cluster and create a new one for the following data.
149  void CommitCluster(NTupleSize_t nEntries);
150  /// Finalize the current cluster and the entrire data set.
151  void CommitDataset() { CommitDatasetImpl(); }
152 
153  /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
154  /// the page sink picks an appropriate size.
155  virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) = 0;
156 };
157 
158 // clang-format off
159 /**
160 \class ROOT::Experimental::Detail::RPageSource
161 \ingroup NTuple
162 \brief Abstract interface to read data from an ntuple
163 
164 The page source is initialized with the columns of interest. Pages from those columns can then be
165 mapped into memory. The page source also gives access to the ntuple's meta-data.
166 */
167 // clang-format on
168 class RPageSource : public RPageStorage {
169 public:
170  /// Derived from the model (fields) that are actually being requested at a given point in time
171  using ColumnSet_t = std::unordered_set<DescriptorId_t>;
172 
173 protected:
176  /// The active columns are implicitly defined by the model fields or views
178 
179  virtual RNTupleDescriptor AttachImpl() = 0;
180 
181 public:
183  virtual ~RPageSource();
184  /// Guess the concrete derived page source from the file name (location)
185  static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
187  /// Open the same storage multiple time, e.g. for reading in multiple threads
188  virtual std::unique_ptr<RPageSource> Clone() const = 0;
189 
191  const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
192  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
193  void DropColumn(ColumnHandle_t columnHandle) final;
194 
195  /// Open the physical storage container for the tree
196  void Attach() { fDescriptor = AttachImpl(); }
199  ColumnId_t GetColumnId(ColumnHandle_t columnHandle);
200 
201  /// Allocates and fills a page that contains the index-th element
202  virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
203  /// Another version of PopulatePage that allows to specify cluster-relative indexes
204  virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
205 
206  /// Populates all the pages of the given cluster id and columns; it is possible that some columns do not
207  /// contain any pages. The pages source may load more columns than the minimal necessary set from `columns`.
208  /// To indicate which columns have been loaded, LoadCluster() must mark them with SetColumnAvailable().
209  /// That includes the ones from the `columns` that don't have pages; otherwise subsequent requests
210  /// for the cluster would assume an incomplete cluster and trigger loading again.
211  /// LoadCluster() is typically called from the I/O thread of a cluster pool, i.e. the method runs
212  /// concurrently to other methods of the page source.
213  virtual std::unique_ptr<RCluster> LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) = 0;
214 };
215 
216 } // namespace Detail
217 
218 } // namespace Experimental
219 } // namespace ROOT
220 
221 #endif
ROOT::Experimental::RNTupleWriteOptions
Common user-tunable settings for storing ntuples.
Definition: RNTupleOptions.hxx:58
ROOT::Experimental::Detail::RPageStorage::~RPageStorage
virtual ~RPageStorage()
Definition: RPageStorage.cxx:37
ROOT::Experimental::Detail::RPageSource::~RPageSource
virtual ~RPageSource()
Definition: RPageStorage.cxx:56
ROOT::Experimental::Detail::RPageSource::AttachImpl
virtual RNTupleDescriptor AttachImpl()=0
ROOT::Experimental::Detail::RPage
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:59
ROOT::Experimental::Detail::RNTupleMetrics
A collection of Counter objects with a name, a unit, and a description.
Definition: RNTupleMetrics.hxx:261
ROOT::Experimental::Detail::RPageSource::Create
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
Definition: RPageStorage.cxx:60
ROOT::Experimental::Detail::RPageSink::CommitPage
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
Definition: RPageStorage.cxx:161
ROOT::Experimental::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:90
ROOT::Experimental::Detail::RPageStorage
Common functionality of an ntuple storage for both reading and writing.
Definition: RPageStorage.hxx:72
RPage.hxx
ROOT::Experimental::Detail::RPageSource::fActiveColumns
ColumnSet_t fActiveColumns
The active columns are implicitly defined by the model fields or views.
Definition: RPageStorage.hxx:189
ROOT::Experimental::Detail::RPageSink::DropColumn
void DropColumn(ColumnHandle_t) final
Unregisters a column.
Definition: RPageStorage.hxx:152
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::Experimental::Detail::RPageSource::AddColumn
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Definition: RPageStorage.cxx:67
ROOT::Experimental::Detail::RPageSink::fOpenPageRanges
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
Definition: RPageStorage.hxx:135
ROOT::Experimental::Detail::RPageSink::fLastClusterId
DescriptorId_t fLastClusterId
Definition: RPageStorage.hxx:130
ROOT::Experimental::Detail::RPageSource::ColumnSet_t
std::unordered_set< DescriptorId_t > ColumnSet_t
Derived from the model (fields) that are actually being requested at a given point in time.
Definition: RPageStorage.hxx:183
ROOT::Experimental::RNTupleDescriptorBuilder
A helper class for piece-wise construction of an RNTupleDescriptor.
Definition: RNTupleDescriptor.hxx:480
ROOT::Experimental::Detail::RPageSink::CommitPageImpl
virtual RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
ROOT::Experimental::Detail::RPageSink::fDescriptorBuilder
RNTupleDescriptorBuilder fDescriptorBuilder
Definition: RPageStorage.hxx:136
ROOT::Experimental::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:54
ROOT::Experimental::RNTupleReadOptions
Common user-tunable settings for reading ntuples.
Definition: RNTupleOptions.hxx:83
ROOT::Experimental::Detail::RPageStorage::fNTupleName
std::string fNTupleName
Definition: RPageStorage.hxx:74
ROOT::Experimental::Detail::RPageSource
Abstract interface to read data from an ntuple.
Definition: RPageStorage.hxx:180
ROOT::Experimental::Detail::RPageSink::fLastColumnId
DescriptorId_t fLastColumnId
Definition: RPageStorage.hxx:129
ROOT::Experimental::Detail::EPageStorageType::kSource
@ kSource
ROOT::Experimental::Detail::RPageSource::Attach
void Attach()
Open the physical storage container for the tree.
Definition: RPageStorage.hxx:208
ROOT::Experimental::Detail::RPageStorage::RColumnHandle::fColumn
const RColumn * fColumn
Definition: RPageStorage.hxx:87
bool
ROOT::Experimental::Detail::RPageSource::Clone
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
RFieldBase
A field translates read and write calls from/to underlying columns to/from tree values.
Definition: RField.hxx:60
ROOT::Experimental::Detail::RPageStorage::RPageStorage
RPageStorage(std::string_view name)
Definition: RPageStorage.cxx:33
ROOT::Experimental::RNTupleDescriptor
The on-storage meta-data of an ntuple.
Definition: RNTupleDescriptor.hxx:286
ROOT::Experimental::Detail::RPageStorage::ColumnHandle_t
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
Definition: RPageStorage.hxx:94
ROOT::Experimental::Detail::RPageSink::fLastFieldId
DescriptorId_t fLastFieldId
Building the ntuple descriptor while writing is done in the same way for all the storage sink impleme...
Definition: RPageStorage.hxx:128
ROOT::Experimental::Detail::RPageSource::GetNElements
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
Definition: RPageStorage.cxx:86
ROOT::Experimental::Detail::RPageSink::AddColumn
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Definition: RPageStorage.cxx:117
ROOT::Experimental::Detail::RPageSource::RPageSource
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
Definition: RPageStorage.cxx:51
ROOT::Experimental::Detail::RPageSink::CommitClusterImpl
virtual RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries)=0
ROOT::Experimental::Detail::EPageStorageType
EPageStorageType
Definition: RPageStorage.hxx:57
RStringView.hxx
ROOT::Experimental::Detail::RPageSource::GetDescriptor
const RNTupleDescriptor & GetDescriptor() const
Definition: RPageStorage.hxx:203
ROOT::Experimental::Detail::RPageStorage::RColumnHandle
Definition: RPageStorage.hxx:85
ROOT::Experimental::RNTupleModel
The RNTupleModel encapulates the schema of an ntuple.
Definition: RNTupleModel.hxx:58
ROOT::Experimental::Detail::RPageSink::Create
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
Definition: RPageStorage.cxx:110
ROOT::Experimental::Detail::RPageSink::CreateImpl
virtual void CreateImpl(const RNTupleModel &model)=0
ROOT::Experimental::Detail::RPageSink
Abstract interface to write data into an ntuple.
Definition: RPageStorage.hxx:122
ROOT::Experimental::Detail::RPageStorage::GetType
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
RNTupleUtil.hxx
ROOT::Experimental::Detail::RPageSink::RPageSink
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
Definition: RPageStorage.cxx:101
ROOT::Experimental::RClusterIndex
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:94
ROOT::Experimental::Detail::RPageSource::GetNEntries
NTupleSize_t GetNEntries()
Definition: RPageStorage.cxx:81
ROOT::Experimental::Detail::RPageSource::DropColumn
void DropColumn(ColumnHandle_t columnHandle) final
Unregisters a column.
Definition: RPageStorage.cxx:76
ROOT::Experimental::Detail::RPageSource::GetColumnId
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
Definition: RPageStorage.cxx:91
RPageAllocator.hxx
ROOT::Experimental::Detail::RPageStorage::operator=
RPageStorage & operator=(const RPageStorage &other)=delete
ROOT::Experimental::Detail::RPageStorage::ReleasePage
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
ROOT::Experimental::Detail::RPageSource::fDescriptor
RNTupleDescriptor fDescriptor
Definition: RPageStorage.hxx:187
ROOT::Experimental::Detail::RColumn
Definition: RColumn.hxx:61
ROOT::Experimental::Detail::RPageSink::CommitCluster
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
Definition: RPageStorage.cxx:174
ROOT::Experimental::RClusterDescriptor::RLocator
Generic information about the physical location of data.
Definition: RNTupleDescriptor.hxx:167
name
char name[80]
Definition: TGX11.cxx:110
ROOT::Experimental::Detail::RPageSink::fOptions
RNTupleWriteOptions fOptions
Definition: RPageStorage.hxx:124
ROOT::Experimental::Detail::RPageStorage::RColumnHandle::fId
DescriptorId_t fId
Definition: RPageStorage.hxx:86
make_cnn_model.model
model
Definition: make_cnn_model.py:6
ROOT::Experimental::Detail::RPageSink::ReservePage
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0)=0
Get a new, empty page for the given column that can be filled with up to nElements.
ROOT::Experimental::kInvalidDescriptorId
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:91
ROOT::Experimental::Detail::RPageSink::GetType
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
Definition: RPageStorage.hxx:149
ROOT::Experimental::Detail::RPageSink::CommitDatasetImpl
virtual void CommitDatasetImpl()=0
RColumn
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
ROOT::Experimental::Detail::RPageSource::PopulatePage
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
ROOT::Experimental::Detail::RPageStorage::AddColumn
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
ROOT::Experimental::Detail::RPageSource::LoadCluster
virtual std::unique_ptr< RCluster > LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns)=0
Populates all the pages of the given cluster id and columns; it is possible that some columns do not ...
RNTupleOptions.hxx
ROOT::Experimental::ColumnId_t
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:86
ROOT::Experimental::Detail::EPageStorageType::kSink
@ kSink
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::Experimental::Detail::RPageSource::fOptions
RNTupleReadOptions fOptions
Definition: RPageStorage.hxx:186
ROOT::Experimental::Detail::RPageSource::GetType
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
Definition: RPageStorage.hxx:202
ROOT::Experimental::Detail::RPageSink::fPrevClusterNEntries
NTupleSize_t fPrevClusterNEntries
Definition: RPageStorage.hxx:131
ROOT::Experimental::Detail::RPageStorage::DropColumn
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
ROOT::Experimental::Detail::RPageSink::fOpenColumnRanges
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
Definition: RPageStorage.hxx:133
ROOT::Experimental::Detail::RPageSink::CommitDataset
void CommitDataset()
Finalize the current cluster and the entrire data set.
Definition: RPageStorage.hxx:163
ROOT::Experimental::Detail::RPageStorage::GetMetrics
virtual RNTupleMetrics & GetMetrics()
Returns an empty metrics. Page storage implementations usually have their own metrics.
Definition: RPageStorage.cxx:41
ROOT::Experimental::Detail::RPageSink::~RPageSink
virtual ~RPageSink()
Definition: RPageStorage.cxx:106
RNTupleDescriptor.hxx