Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <cstring> // for memcpy
29#include <memory>
30#include <utility>
31
32namespace ROOT {
33namespace Experimental {
34namespace Internal {
35
36// clang-format off
37/**
38\class ROOT::Internal::RColumn
39\ingroup NTuple
40\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
41*/
42// clang-format on
43class RColumn {
44private:
46 /**
47 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
48 * the offset column with index 0 and the character value column with index 1.
49 */
50 std::uint32_t fIndex;
51 RPageSink *fPageSink = nullptr;
55 /// A set of open pages into which new elements are being written. The pages are used
56 /// in rotation. They are 50% bigger than the target size given by the write options.
57 /// The current page is filled until the target size, but it is only committed once the other
58 /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
59 /// page will be committed.
61 /// Index of the current write page
63 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
64 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
65 /// "page full" and "page half full" events when writing the page.
66 std::uint32_t fApproxNElementsPerPage = 0;
67 /// The number of elements written resp. available in the column
69 /// The currently mapped page for reading
71 /// The column id is used to find matching pages with content when reading
73 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
75 /// Used to pack and unpack pages on writing/reading
76 std::unique_ptr<RColumnElementBase> fElement;
77
78 RColumn(const RColumnModel &model, std::uint32_t index);
79
80 /// Used in Append() and AppendV() to switch pages when the main page reached the target size
81 /// The other page has been flushed when the main page reached 50%.
84 return;
85
86 fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
89 }
90
91 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
93 auto otherIdx = 1 - fWritePageIdx;
94 if (fWritePage[otherIdx].IsEmpty())
95 return;
97 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
98 // fNElements in SwapWritePagesIfFull() when the pages swap
99 fWritePage[otherIdx].Reset(0);
100 }
101
102public:
103 template <typename CppT>
104 static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
105 {
106 auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
107 column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
108 return column;
109 }
110
111 RColumn(const RColumn&) = delete;
112 RColumn &operator =(const RColumn&) = delete;
113 ~RColumn();
114
115 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
116 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
117 /// to be mapped.
118 void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex = 0U);
119 /// Connect the column to a page source.
120 void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource);
121
122 void Append(const void *from)
123 {
124 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
125
128 }
129
130 std::memcpy(dst, from, fElement->GetSize());
131 fNElements++;
132
134 }
135
136 void AppendV(const void *from, std::size_t count)
137 {
138 // We might not have enough space in the current page. In this case, fall back to one by one filling.
140 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
141 for (unsigned i = 0; i < count; ++i) {
142 Append(static_cast<const unsigned char *>(from) + fElement->GetSize() * i);
143 }
144 return;
145 }
146
147 // The check for flushing the shadow page is more complicated than for the Append() case
148 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
149 // we might instead jump over the 50% fill level.
150 // This check should be done before calling `RPage::GrowUnchecked()` as the latter affects the return value of
151 // `RPage::GetNElements()`.
154 {
156 }
157
158 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
159
160 std::memcpy(dst, from, fElement->GetSize() * count);
161 fNElements += count;
162
163 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
165 }
166
167 void Read(const NTupleSize_t globalIndex, void *to)
168 {
169 if (!fReadPage.Contains(globalIndex)) {
170 MapPage(globalIndex);
171 }
172 const auto elemSize = fElement->GetSize();
173 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
174 (globalIndex - fReadPage.GetGlobalRangeFirst()) * elemSize;
175 std::memcpy(to, from, elemSize);
176 }
177
178 void Read(RClusterIndex clusterIndex, void *to)
179 {
180 if (!fReadPage.Contains(clusterIndex)) {
181 MapPage(clusterIndex);
182 }
183 const auto elemSize = fElement->GetSize();
184 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
185 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * elemSize;
186 std::memcpy(to, from, elemSize);
187 }
188
189 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
190 {
191 if (!fReadPage.Contains(globalIndex)) {
192 MapPage(globalIndex);
193 }
194 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
195
196 const auto elemSize = fElement->GetSize();
197 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
198 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
199 std::memcpy(to, from, elemSize * count);
200 } else {
201 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
202 std::memcpy(to, from, elemSize * nBatch);
203 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
204 ReadV(globalIndex + nBatch, count - nBatch, tail);
205 }
206 }
207
208 void ReadV(RClusterIndex clusterIndex, const ClusterSize_t::ValueType count, void *to)
209 {
210 if (!fReadPage.Contains(clusterIndex)) {
211 MapPage(clusterIndex);
212 }
213 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
214
215 const auto elemSize = fElement->GetSize();
216 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
217 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
218 std::memcpy(to, from, elemSize * count);
219 } else {
220 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
221 std::memcpy(to, from, elemSize * nBatch);
222 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
223 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, tail);
224 }
225 }
226
227 template <typename CppT>
228 CppT *Map(const NTupleSize_t globalIndex) {
229 NTupleSize_t nItems;
230 return MapV<CppT>(globalIndex, nItems);
231 }
232
233 template <typename CppT>
234 CppT *Map(RClusterIndex clusterIndex)
235 {
236 NTupleSize_t nItems;
237 return MapV<CppT>(clusterIndex, nItems);
238 }
239
240 template <typename CppT>
241 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
242 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
243 MapPage(globalIndex);
244 }
245 // +1 to go from 0-based indexing to 1-based number of items
246 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
247 return reinterpret_cast<CppT*>(
248 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
250 }
251
252 template <typename CppT>
253 CppT *MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
254 {
255 if (!fReadPage.Contains(clusterIndex)) {
256 MapPage(clusterIndex);
257 }
258 // +1 to go from 0-based indexing to 1-based number of items
259 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
260 return reinterpret_cast<CppT*>(
261 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
263 }
264
266 {
267 if (!fReadPage.Contains(clusterIndex)) {
268 MapPage(clusterIndex);
269 }
270 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
271 }
272
274 if (!fReadPage.Contains(globalIndex)) {
275 MapPage(globalIndex);
276 }
278 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
279 }
280
281 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
282 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
283 {
284 NTupleSize_t idxStart = 0;
285 NTupleSize_t idxEnd;
286 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
287 if (R__likely(globalIndex > 0)) {
288 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
289 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
290 idxEnd = *Map<ClusterSize_t>(globalIndex);
291 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
292 idxStart = 0;
293 } else {
294 idxEnd = *Map<ClusterSize_t>(globalIndex);
295 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
296 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
297 }
298 } else {
299 idxEnd = *Map<ClusterSize_t>(globalIndex);
300 }
301 *collectionSize = idxEnd - idxStart;
302 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
303 }
304
305 void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
306 {
307 auto index = clusterIndex.GetIndex();
308 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
309 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
310 *collectionSize = idxEnd - idxStart;
311 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
312 }
313
314 /// Get the currently active cluster id
315 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
316 auto varSwitch = Map<RColumnSwitch>(globalIndex);
317 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
318 *tag = varSwitch->GetTag();
319 }
320
321 void Flush();
322 void MapPage(const NTupleSize_t index);
323 void MapPage(RClusterIndex clusterIndex);
325 RColumnElementBase *GetElement() const { return fElement.get(); }
326 const RColumnModel &GetModel() const { return fModel; }
327 std::uint32_t GetIndex() const { return fIndex; }
331 RPageSink *GetPageSink() const { return fPageSink; }
334}; // class RColumn
335
336} // namespace Internal
337} // namespace Experimental
338} // namespace ROOT
339
340#endif
#define R__likely(expr)
Definition RConfig.hxx:604
#define R__unlikely(expr)
Definition RConfig.hxx:603
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A column element encapsulates the translation between basic C++ types and their column representation...
RColumn(const RColumn &)=delete
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:333
void Read(RClusterIndex clusterIndex, void *to)
Definition RColumn.hxx:178
void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:42
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:76
NTupleSize_t GetGlobalIndex(RClusterIndex clusterIndex)
Definition RColumn.hxx:265
RColumnElementBase * GetElement() const
Definition RColumn.hxx:325
void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition RColumn.hxx:305
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition RColumn.hxx:72
void ReadV(RClusterIndex clusterIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:208
int fWritePageIdx
Index of the current write page.
Definition RColumn.hxx:62
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:228
RPageSource * GetPageSource() const
Definition RColumn.hxx:330
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:136
void Append(const void *from)
Definition RColumn.hxx:122
CppT * Map(RClusterIndex clusterIndex)
Definition RColumn.hxx:234
CppT * MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:253
RColumn & operator=(const RColumn &)=delete
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:54
void SwapWritePagesIfFull()
Used in Append() and AppendV() to switch pages when the main page reached the target size The other p...
Definition RColumn.hxx:82
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:68
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:189
void Read(const NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:167
void MapPage(const NTupleSize_t index)
Definition RColumn.cxx:88
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:50
void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:56
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition RColumn.hxx:66
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:282
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:241
static std::unique_ptr< RColumn > Create(const RColumnModel &model, std::uint32_t index)
Definition RColumn.hxx:104
const RColumnModel & GetModel() const
Definition RColumn.hxx:326
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:315
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:53
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition RColumn.hxx:92
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition RColumn.hxx:60
NTupleSize_t GetNElements() const
Definition RColumn.hxx:324
NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:329
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:332
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:273
RPage fReadPage
The currently mapped page for reading.
Definition RColumn.hxx:70
NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:74
Abstract interface to write data into an ntuple.
virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page)=0
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition RPage.hxx:122
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition RPage.hxx:111
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:88
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:86
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:92
std::uint32_t GetNElements() const
Definition RPage.hxx:84
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:94
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition RPage.hxx:89
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:87
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of an RNTuple column.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr ColumnId_t kInvalidColumnId
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.