Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <cstring> // for memcpy
29#include <memory>
30#include <utility>
31
32namespace ROOT {
33namespace Experimental {
34namespace Internal {
35
36// clang-format off
37/**
38\class ROOT::Experimental::Internal::RColumn
39\ingroup NTuple
40\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
41*/
42// clang-format on
43class RColumn {
44private:
46 /**
47 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
48 * the offset column with index 0 and the character value column with index 1.
49 */
50 std::uint32_t fIndex;
51 RPageSink *fPageSink = nullptr;
55 /// A set of open pages into which new elements are being written. The pages are used
56 /// in rotation. If tail page optimization is enabled, they are 50% bigger than the target size
57 /// given by the write options. The current page is filled until the target size, but it is only
58 /// committed once the other write page is filled at least 50%. If a flush occurs earlier, a
59 /// slightly oversized, single page will be committed.
60 /// Without tail page optimization, only one page is allocated equal to the target size.
62 /// Index of the current write page
64 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
65 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
66 /// "page full" and "page half full" events when writing the page.
67 std::uint32_t fApproxNElementsPerPage = 0;
68 /// The number of elements written resp. available in the column
70 /// The currently mapped page for reading
72 /// The column id is used to find matching pages with content when reading
74 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
76 /// Used to pack and unpack pages on writing/reading
77 std::unique_ptr<RColumnElementBase> fElement;
78
79 RColumn(const RColumnModel &model, std::uint32_t index);
80
81 /// Used in Append() and AppendV() to handle the case when the main page reached the target size.
82 /// If tail page optimization is enabled, switch the pages; the other page has been flushed when
83 /// the main page reached 50%.
84 /// Without tail page optimization, flush the current page to make room for future writes.
86 {
88 return;
89
90 auto otherIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
91 if (fWritePage[otherIdx].IsNull()) {
92 // There is only this page; we have to flush it now to make room for future writes.
95 } else {
96 fWritePageIdx = otherIdx;
99 }
100 }
101
102 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
104 auto otherIdx = 1 - fWritePageIdx;
105 if (fWritePage[otherIdx].IsEmpty())
106 return;
108 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
109 // fNElements in SwapWritePagesIfFull() when the pages swap
110 fWritePage[otherIdx].Reset(0);
111 }
112
113public:
114 template <typename CppT>
115 static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
116 {
117 auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
118 column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
119 return column;
120 }
121
122 RColumn(const RColumn&) = delete;
123 RColumn &operator =(const RColumn&) = delete;
124 ~RColumn();
125
126 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
127 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
128 /// to be mapped.
129 void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex = 0U);
130 /// Connect the column to a page source.
131 void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource);
132
133 void Append(const void *from)
134 {
135 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
136
139 }
140
141 std::memcpy(dst, from, fElement->GetSize());
142 fNElements++;
143
145 }
146
147 void AppendV(const void *from, std::size_t count)
148 {
149 // We might not have enough space in the current page. In this case, fall back to one by one filling.
151 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
152 for (unsigned i = 0; i < count; ++i) {
153 Append(static_cast<const unsigned char *>(from) + fElement->GetSize() * i);
154 }
155 return;
156 }
157
158 // The check for flushing the shadow page is more complicated than for the Append() case
159 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
160 // we might instead jump over the 50% fill level.
161 // This check should be done before calling `RPage::GrowUnchecked()` as the latter affects the return value of
162 // `RPage::GetNElements()`.
165 {
167 }
168
169 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
170
171 std::memcpy(dst, from, fElement->GetSize() * count);
172 fNElements += count;
173
174 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
176 }
177
178 void Read(const NTupleSize_t globalIndex, void *to)
179 {
180 if (!fReadPage.Contains(globalIndex)) {
181 MapPage(globalIndex);
182 }
183 const auto elemSize = fElement->GetSize();
184 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
185 (globalIndex - fReadPage.GetGlobalRangeFirst()) * elemSize;
186 std::memcpy(to, from, elemSize);
187 }
188
189 void Read(RClusterIndex clusterIndex, void *to)
190 {
191 if (!fReadPage.Contains(clusterIndex)) {
192 MapPage(clusterIndex);
193 }
194 const auto elemSize = fElement->GetSize();
195 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
196 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * elemSize;
197 std::memcpy(to, from, elemSize);
198 }
199
200 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
201 {
202 if (!fReadPage.Contains(globalIndex)) {
203 MapPage(globalIndex);
204 }
205 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
206
207 const auto elemSize = fElement->GetSize();
208 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
209 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
210 std::memcpy(to, from, elemSize * count);
211 } else {
212 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
213 std::memcpy(to, from, elemSize * nBatch);
214 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
215 ReadV(globalIndex + nBatch, count - nBatch, tail);
216 }
217 }
218
219 void ReadV(RClusterIndex clusterIndex, const ClusterSize_t::ValueType count, void *to)
220 {
221 if (!fReadPage.Contains(clusterIndex)) {
222 MapPage(clusterIndex);
223 }
224 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
225
226 const auto elemSize = fElement->GetSize();
227 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
228 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
229 std::memcpy(to, from, elemSize * count);
230 } else {
231 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
232 std::memcpy(to, from, elemSize * nBatch);
233 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
234 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, tail);
235 }
236 }
237
238 template <typename CppT>
239 CppT *Map(const NTupleSize_t globalIndex) {
240 NTupleSize_t nItems;
241 return MapV<CppT>(globalIndex, nItems);
242 }
243
244 template <typename CppT>
245 CppT *Map(RClusterIndex clusterIndex)
246 {
247 NTupleSize_t nItems;
248 return MapV<CppT>(clusterIndex, nItems);
249 }
250
251 template <typename CppT>
252 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
253 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
254 MapPage(globalIndex);
255 }
256 // +1 to go from 0-based indexing to 1-based number of items
257 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
258 return reinterpret_cast<CppT*>(
259 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
261 }
262
263 template <typename CppT>
264 CppT *MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
265 {
266 if (!fReadPage.Contains(clusterIndex)) {
267 MapPage(clusterIndex);
268 }
269 // +1 to go from 0-based indexing to 1-based number of items
270 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
271 return reinterpret_cast<CppT*>(
272 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
274 }
275
277 {
278 if (!fReadPage.Contains(clusterIndex)) {
279 MapPage(clusterIndex);
280 }
281 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
282 }
283
285 if (!fReadPage.Contains(globalIndex)) {
286 MapPage(globalIndex);
287 }
289 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
290 }
291
292 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
293 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
294 {
295 NTupleSize_t idxStart = 0;
296 NTupleSize_t idxEnd;
297 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
298 if (R__likely(globalIndex > 0)) {
299 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
300 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
301 idxEnd = *Map<ClusterSize_t>(globalIndex);
302 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
303 idxStart = 0;
304 } else {
305 idxEnd = *Map<ClusterSize_t>(globalIndex);
306 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
307 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
308 }
309 } else {
310 idxEnd = *Map<ClusterSize_t>(globalIndex);
311 }
312 *collectionSize = idxEnd - idxStart;
313 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
314 }
315
316 void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
317 {
318 auto index = clusterIndex.GetIndex();
319 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
320 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
321 *collectionSize = idxEnd - idxStart;
322 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
323 }
324
325 /// Get the currently active cluster id
326 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
327 auto varSwitch = Map<RColumnSwitch>(globalIndex);
328 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
329 *tag = varSwitch->GetTag();
330 }
331
332 void Flush();
333 void MapPage(const NTupleSize_t index);
334 void MapPage(RClusterIndex clusterIndex);
336 RColumnElementBase *GetElement() const { return fElement.get(); }
337 const RColumnModel &GetModel() const { return fModel; }
338 std::uint32_t GetIndex() const { return fIndex; }
342 RPageSink *GetPageSink() const { return fPageSink; }
345}; // class RColumn
346
347} // namespace Internal
348} // namespace Experimental
349} // namespace ROOT
350
351#endif
#define R__likely(expr)
Definition RConfig.hxx:579
#define R__unlikely(expr)
Definition RConfig.hxx:578
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A column element encapsulates the translation between basic C++ types and their column representation...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:43
RColumn(const RColumn &)=delete
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:344
void Read(RClusterIndex clusterIndex, void *to)
Definition RColumn.hxx:189
void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:42
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:77
void HandleWritePageIfFull()
Used in Append() and AppendV() to handle the case when the main page reached the target size.
Definition RColumn.hxx:85
NTupleSize_t GetGlobalIndex(RClusterIndex clusterIndex)
Definition RColumn.hxx:276
RColumnElementBase * GetElement() const
Definition RColumn.hxx:336
void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition RColumn.hxx:316
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition RColumn.hxx:73
void ReadV(RClusterIndex clusterIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:219
int fWritePageIdx
Index of the current write page.
Definition RColumn.hxx:63
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:239
RPageSource * GetPageSource() const
Definition RColumn.hxx:341
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:147
void Append(const void *from)
Definition RColumn.hxx:133
CppT * Map(RClusterIndex clusterIndex)
Definition RColumn.hxx:245
CppT * MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:264
RColumn & operator=(const RColumn &)=delete
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:54
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:69
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:200
void Read(const NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:178
void MapPage(const NTupleSize_t index)
Definition RColumn.cxx:96
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:50
void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:63
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition RColumn.hxx:67
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:293
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:252
static std::unique_ptr< RColumn > Create(const RColumnModel &model, std::uint32_t index)
Definition RColumn.hxx:115
const RColumnModel & GetModel() const
Definition RColumn.hxx:337
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:326
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:53
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition RColumn.hxx:103
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition RColumn.hxx:61
NTupleSize_t GetNElements() const
Definition RColumn.hxx:335
NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:340
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:343
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:284
RPage fReadPage
The currently mapped page for reading.
Definition RColumn.hxx:71
NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:75
Abstract interface to write data into an ntuple.
virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page)=0
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition RPage.hxx:122
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition RPage.hxx:111
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:88
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:86
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:92
std::uint32_t GetNElements() const
Definition RPage.hxx:84
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:94
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition RPage.hxx:89
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:87
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of an RNTuple column.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr ColumnId_t kInvalidColumnId
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.