Logo ROOT   master
Reference Guide
RPageStorage.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorage.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-07-19
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorage
17 #define ROOT7_RPageStorage
18 
20 #include <ROOT/RNTupleOptions.hxx>
21 #include <ROOT/RNTupleUtil.hxx>
22 #include <ROOT/RPage.hxx>
23 #include <ROOT/RPageAllocator.hxx>
24 #include <ROOT/RStringView.hxx>
25 
26 #include <atomic>
27 #include <cstddef>
28 #include <memory>
29 
30 namespace ROOT {
31 namespace Experimental {
32 
33 class RNTupleModel;
34 // TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
35 
36 namespace Detail {
37 
38 class RColumn;
39 class RPagePool;
40 class RFieldBase;
41 class RNTupleMetrics;
42 
43 enum class EPageStorageType {
44  kSink,
45  kSource,
46 };
47 
48 // clang-format off
49 /**
50 \class ROOT::Experimental::Detail::RPageStorage
51 \ingroup NTuple
52 \brief Common functionality of an ntuple storage for both reading and writing
53 
54 The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
55 an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
56 */
57 // clang-format on
58 class RPageStorage {
59 protected:
60  std::string fNTupleName;
61 
62 public:
63  explicit RPageStorage(std::string_view name);
64  RPageStorage(const RPageStorage &other) = delete;
65  RPageStorage& operator =(const RPageStorage &other) = delete;
66  virtual ~RPageStorage();
67 
68  struct RColumnHandle {
69  RColumnHandle() : fId(-1), fColumn(nullptr) {}
70  RColumnHandle(int id, const RColumn *column) : fId(id), fColumn(column) {}
71  int fId;
72  const RColumn *fColumn;
73  };
74  /// The column handle identifies a column with the current open page storage
76 
77  /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
78  /// When writing, every column can only be attached once.
79  virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
80  /// Whether the concrete implementation is a sink or a source
81  virtual EPageStorageType GetType() = 0;
82 
83  /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
84  /// of allocating pages.
85  virtual void ReleasePage(RPage &page) = 0;
86 
87  /// Page storage implementations usually have their own metrics
88  virtual RNTupleMetrics &GetMetrics() = 0;
89 };
90 
91 // clang-format off
92 /**
93 \class ROOT::Experimental::Detail::RPageSink
94 \ingroup NTuple
95 \brief Abstract interface to write data into an ntuple
96 
97 The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
98 The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
99 up to the given entry number are committed.
100 */
101 // clang-format on
102 class RPageSink : public RPageStorage {
103 protected:
105 
106  /// Building the ntuple descriptor while writing is done in the same way for all the storage sink implementations.
107  /// Field, column, cluster ids and page indexes per cluster are issued sequentially starting with 0
112  /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
113  std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
114  /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
115  std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
117 
118  virtual void CreateImpl(const RNTupleModel &model) = 0;
119  virtual RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
121  virtual void CommitDatasetImpl() = 0;
122 
123 public:
124  RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
125  virtual ~RPageSink();
126  /// Guess the concrete derived page source from the file name (location)
127  static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
128  const RNTupleWriteOptions &options = RNTupleWriteOptions());
130 
131  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
132 
133  /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
134  /// To do so, Create() calls CreateImpl() after updating the descriptor.
135  /// Create() associates column handles to the columns referenced by the model
136  void Create(RNTupleModel &model);
137  /// Write a page to the storage. The column must have been added before.
138  void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
139  /// Finalize the current cluster and create a new one for the following data.
140  void CommitCluster(NTupleSize_t nEntries);
141  /// Finalize the current cluster and the entrire data set.
143 
144  /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
145  /// the page sink picks an appropriate size.
146  virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) = 0;
147 };
148 
149 // clang-format off
150 /**
151 \class ROOT::Experimental::Detail::RPageSource
152 \ingroup NTuple
153 \brief Abstract interface to read data from an ntuple
154 
155 The page source is initialized with the columns of interest. Pages from those columns can then be
156 mapped into memory. The page source also gives access to the ntuple's meta-data.
157 */
158 // clang-format on
159 class RPageSource : public RPageStorage {
160 protected:
163 
164  virtual RNTupleDescriptor AttachImpl() = 0;
165 
166 public:
167  RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions);
168  virtual ~RPageSource();
169  /// Guess the concrete derived page source from the file name (location)
170  static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
171  const RNTupleReadOptions &options = RNTupleReadOptions());
172  /// Open the same storage multiple time, e.g. for reading in multiple threads
173  virtual std::unique_ptr<RPageSource> Clone() const = 0;
174 
176  const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
177  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
178 
179  /// Open the physical storage container for the tree
180  void Attach() { fDescriptor = AttachImpl(); }
183  ColumnId_t GetColumnId(ColumnHandle_t columnHandle);
184 
185  /// Allocates and fills a page that contains the index-th element
186  virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
187  /// Another version of PopulatePage that allows to specify cluster-relative indexes
188  virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
189 };
190 
191 } // namespace Detail
192 
193 } // namespace Experimental
194 } // namespace ROOT
195 
196 #endif
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CreateImpl(const RNTupleModel &model)=0
const RNTupleWriteOptions fOptions
Returns the available number of logical cores.
Definition: StringConv.hxx:21
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
The RNTupleModel encapulates the schema of an ntuple.
void CommitDataset()
Finalize the current cluster and the entrire data set.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages...
Definition: RNTupleUtil.hxx:74
Abstract interface to write data into an ntuple
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:78
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
A helper class for piece-wise construction of an RNTupleDescriptor
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations usually have their own metrics.
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
Abstract interface to read data from an ntuple
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
A collection of Counter objects with a name, a unit, and a description.
XFontStruct * id
Definition: TGX11.cxx:108
Common functionality of an ntuple storage for both reading and writing
Generic information about the physical location of data.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
void Attach()
Open the physical storage container for the tree.
Common user-tunable settings for reading ntuples
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id...
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id...
const RNTupleDescriptor & GetDescriptor() const
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
virtual RNTupleDescriptor AttachImpl()=0
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0)=0
Get a new, empty page for the given column that can be filled with up to nElements.
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
RNTupleDescriptorBuilder fDescriptorBuilder
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
The on-storage meta-data of an ntuple
Common user-tunable settings for storing ntuples
A page is a slice of a column that is mapped into memory
Definition: RPage.hxx:41
virtual RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries)=0
DescriptorId_t fLastFieldId
Building the ntuple descriptor while writing is done in the same way for all the storage sink impleme...
RPageStorage & operator=(const RPageStorage &other)=delete
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:82
char name[80]
Definition: TGX11.cxx:109