Logo ROOT  
Reference Guide
RPageStorage.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorage.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
18
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <atomic>
27#include <cstddef>
28#include <memory>
29
30namespace ROOT {
31namespace Experimental {
32
33class RNTupleModel;
34// TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
35
36namespace Detail {
37
38class RColumn;
39class RPagePool;
40class RFieldBase;
41class RNTupleMetrics;
42
43enum class EPageStorageType {
44 kSink,
45 kSource,
46};
47
48// clang-format off
49/**
50\class ROOT::Experimental::Detail::RPageStorage
51\ingroup NTuple
52\brief Common functionality of an ntuple storage for both reading and writing
53
54The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
55an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
56*/
57// clang-format on
59protected:
60 std::string fNTupleName;
61
62public:
64 RPageStorage(const RPageStorage &other) = delete;
65 RPageStorage& operator =(const RPageStorage &other) = delete;
66 virtual ~RPageStorage();
67
69 RColumnHandle() : fId(-1), fColumn(nullptr) {}
70 RColumnHandle(int id, const RColumn *column) : fId(id), fColumn(column) {}
71 int fId;
73 };
74 /// The column handle identifies a column with the current open page storage
76
77 /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
78 /// When writing, every column can only be attached once.
79 virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
80 /// Whether the concrete implementation is a sink or a source
82
83 /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
84 /// of allocating pages.
85 virtual void ReleasePage(RPage &page) = 0;
86
87 /// Page storage implementations usually have their own metrics
88 virtual RNTupleMetrics &GetMetrics() = 0;
89};
90
91// clang-format off
92/**
93\class ROOT::Experimental::Detail::RPageSink
94\ingroup NTuple
95\brief Abstract interface to write data into an ntuple
96
97The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
98The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
99up to the given entry number are committed.
100*/
101// clang-format on
102class RPageSink : public RPageStorage {
103protected:
105
106 /// Building the ntuple descriptor while writing is done in the same way for all the storage sink implementations.
107 /// Field, column, cluster ids and page indexes per cluster are issued sequentially starting with 0
112 /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
113 std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
114 /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
115 std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
117
118 virtual void DoCreate(const RNTupleModel &model) = 0;
119 virtual RClusterDescriptor::RLocator DoCommitPage(ColumnHandle_t columnHandle, const RPage &page) = 0;
121 virtual void DoCommitDataset() = 0;
122
123public:
124 RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
125 virtual ~RPageSink();
126 /// Guess the concrete derived page source from the file name (location)
127 static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
128 const RNTupleWriteOptions &options = RNTupleWriteOptions());
130
131 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
132
133 /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
134 /// To do so, Create() calls DoCreate() after updating the descriptor.
135 /// Create() associates column handles to the columns referenced by the model
136 void Create(RNTupleModel &model);
137 /// Write a page to the storage. The column must have been added before.
138 void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
139 /// Finalize the current cluster and create a new one for the following data.
140 void CommitCluster(NTupleSize_t nEntries);
141 /// Finalize the current cluster and the entrire data set.
143
144 /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
145 /// the page sink picks an appropriate size.
146 virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) = 0;
147};
148
149// clang-format off
150/**
151\class ROOT::Experimental::Detail::RPageSource
152\ingroup NTuple
153\brief Abstract interface to read data from an ntuple
154
155The page source is initialized with the columns of interest. Pages from those columns can then be
156mapped into memory. The page source also gives access to the ntuple's meta-data.
157*/
158// clang-format on
159class RPageSource : public RPageStorage {
160protected:
163
165
166public:
168 virtual ~RPageSource();
169 /// Guess the concrete derived page source from the file name (location)
170 static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
171 const RNTupleReadOptions &options = RNTupleReadOptions());
172 /// Open the same storage multiple time, e.g. for reading in multiple threads
173 virtual std::unique_ptr<RPageSource> Clone() const = 0;
174
176 const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
177 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
178
179 /// Open the physical storage container for the tree
184
185 /// Allocates and fills a page that contains the index-th element
186 virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
187 /// Another version of PopulatePage that allows to specify cluster-relative indexes
188 virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
189};
190
191} // namespace Detail
192
193} // namespace Experimental
194} // namespace ROOT
195
196#endif
XFontStruct * id
Definition: TGX11.cxx:108
char name[80]
Definition: TGX11.cxx:109
A collection of Counter objects with a name, a unit, and a description.
Abstract interface to write data into an ntuple.
void CommitDataset()
Finalize the current cluster and the entrire data set.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0)=0
Get a new, empty page for the given column that can be filled with up to nElements.
RNTupleDescriptorBuilder fDescriptorBuilder
virtual void DoCreate(const RNTupleModel &model)=0
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
virtual RClusterDescriptor::RLocator DoCommitPage(ColumnHandle_t columnHandle, const RPage &page)=0
virtual RClusterDescriptor::RLocator DoCommitCluster(NTupleSize_t nEntries)=0
DescriptorId_t fLastFieldId
Building the ntuple descriptor while writing is done in the same way for all the storage sink impleme...
const RNTupleWriteOptions fOptions
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Abstract interface to read data from an ntuple.
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
virtual RNTupleDescriptor DoAttach()=0
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
const RNTupleDescriptor & GetDescriptor() const
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
Common functionality of an ntuple storage for both reading and writing.
RPageStorage(const RPageStorage &other)=delete
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations usually have their own metrics.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:83
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
Definition: RField.hxx:60
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:43
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:79
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:75
VSD Structures.
Definition: StringConv.hxx:21
Generic information about the physical location of data.