Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <memory>
29
30namespace ROOT {
31namespace Experimental {
32namespace Detail {
33
34// clang-format off
35/**
36\class ROOT::Experimental::RColumn
37\ingroup NTuple
38\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
39
40On the primitives data layer, the RColumn and RColumnElement are the equivalents to RField and RFieldValue on the
41logical data layer.
42*/
43// clang-format on
44class RColumn {
45private:
47 /**
48 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
49 * the offset column with index 0 and the character value column with index 1.
50 */
51 std::uint32_t fIndex;
52 RPageSink *fPageSink = nullptr;
56 /// A set of open pages into which new elements are being written. The pages are used
57 /// in rotation. They are 50% bigger than the target size given by the write options.
58 /// The current page is filled until the target size, but it is only committed once the other
59 /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
60 /// page will be committed.
62 /// Index of the current write page
64 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
65 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
66 /// "page full" and "page half full" events when writing the page.
67 std::uint32_t fApproxNElementsPerPage = 0;
68 /// The number of elements written resp. available in the column
70 /// The currently mapped page for reading
72 /// The column id is used to find matching pages with content when reading
74 /// Used to pack and unpack pages on writing/reading
75 std::unique_ptr<RColumnElementBase> fElement;
76
77 RColumn(const RColumnModel &model, std::uint32_t index);
78
79 /// Used in Append() and AppendV() to switch pages when the main page reached the target size
80 /// The other page has been flushed when the main page reached 50%.
83 return;
84
85 fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
88 }
89
90 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
92 auto otherIdx = 1 - fWritePageIdx;
93 if (fWritePage[otherIdx].IsEmpty())
94 return;
96 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
97 // fNElements in SwapWritePagesIfFull() when the pages swap
98 fWritePage[otherIdx].Reset(0);
99 }
100
101public:
102 template <typename CppT, EColumnType ColumnT>
103 static RColumn *Create(const RColumnModel &model, std::uint32_t index) {
104 R__ASSERT(model.GetType() == ColumnT);
105 auto column = new RColumn(model, index);
106 column->fElement = std::unique_ptr<RColumnElementBase>(new RColumnElement<CppT, ColumnT>(nullptr));
107 return column;
108 }
109
110 RColumn(const RColumn&) = delete;
111 RColumn &operator =(const RColumn&) = delete;
112 ~RColumn();
113
114 void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage);
115
116 void Append(const RColumnElementBase &element) {
117 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
118
121 }
122
123 element.WriteTo(dst, 1);
124 fNElements++;
125
127 }
128
129 void AppendV(const RColumnElementBase &elemArray, std::size_t count) {
130 // We might not have enough space in the current page. In this case, fall back to one by one filling.
132 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
133 for (unsigned i = 0; i < count; ++i) {
134 Append(RColumnElementBase(elemArray, i));
135 }
136 return;
137 }
138
139 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
140
141 // The check for flushing the shadow page is more complicated than for the Append() case
142 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
143 // we might instead jump over the 50% fill level
146 {
148 }
149
150 elemArray.WriteTo(dst, count);
151 fNElements += count;
152
153 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
155 }
156
157 void Read(const NTupleSize_t globalIndex, RColumnElementBase *element) {
158 if (!fReadPage.Contains(globalIndex)) {
159 MapPage(globalIndex);
160 R__ASSERT(fReadPage.Contains(globalIndex));
161 }
162 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
163 (globalIndex - fReadPage.GetGlobalRangeFirst()) * element->GetSize();
164 element->ReadFrom(src, 1);
165 }
166
167 void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element) {
168 if (!fReadPage.Contains(clusterIndex)) {
169 MapPage(clusterIndex);
170 }
171 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
172 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * element->GetSize();
173 element->ReadFrom(src, 1);
174 }
175
176 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray) {
177 R__ASSERT(count > 0);
178 if (!fReadPage.Contains(globalIndex)) {
179 MapPage(globalIndex);
180 }
181 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
182
183 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
184 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
185 elemArray->ReadFrom(src, count);
186 } else {
187 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
188 elemArray->ReadFrom(src, nBatch);
189 RColumnElementBase elemTail(*elemArray, nBatch);
190 ReadV(globalIndex + nBatch, count - nBatch, &elemTail);
191 }
192 }
193
194 void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
195 {
196 if (!fReadPage.Contains(clusterIndex)) {
197 MapPage(clusterIndex);
198 }
199 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
200
201 void* src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
202 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
203 elemArray->ReadFrom(src, count);
204 } else {
205 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
206 elemArray->ReadFrom(src, nBatch);
207 RColumnElementBase elemTail(*elemArray, nBatch);
208 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, &elemTail);
209 }
210 }
211
212 template <typename CppT>
213 CppT *Map(const NTupleSize_t globalIndex) {
214 NTupleSize_t nItems;
215 return MapV<CppT>(globalIndex, nItems);
216 }
217
218 template <typename CppT>
219 CppT *Map(const RClusterIndex &clusterIndex) {
220 NTupleSize_t nItems;
221 return MapV<CppT>(clusterIndex, nItems);
222 }
223
224 template <typename CppT>
225 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
226 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
227 MapPage(globalIndex);
228 }
229 // +1 to go from 0-based indexing to 1-based number of items
230 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
231 return reinterpret_cast<CppT*>(
232 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
234 }
235
236 template <typename CppT>
237 CppT *MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems) {
238 if (!fReadPage.Contains(clusterIndex)) {
239 MapPage(clusterIndex);
240 }
241 // +1 to go from 0-based indexing to 1-based number of items
242 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
243 return reinterpret_cast<CppT*>(
244 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
246 }
247
249 if (!fReadPage.Contains(clusterIndex)) {
250 MapPage(clusterIndex);
251 }
252 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
253 }
254
256 if (!fReadPage.Contains(globalIndex)) {
257 MapPage(globalIndex);
258 }
260 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
261 }
262
263 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
264 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
265 {
266 NTupleSize_t idxStart = 0;
267 NTupleSize_t idxEnd;
268 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
269 if (R__likely(globalIndex > 0)) {
270 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
271 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
272 idxEnd = *Map<ClusterSize_t>(globalIndex);
273 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
274 idxStart = 0;
275 } else {
276 idxEnd = *Map<ClusterSize_t>(globalIndex);
277 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
278 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
279 }
280 } else {
281 idxEnd = *Map<ClusterSize_t>(globalIndex);
282 }
283 *collectionSize = idxEnd - idxStart;
284 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
285 }
286
287 void GetCollectionInfo(const RClusterIndex &clusterIndex,
288 RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
289 {
290 auto index = clusterIndex.GetIndex();
291 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
292 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
293 *collectionSize = idxEnd - idxStart;
294 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
295 }
296
297 /// Get the currently active cluster id
298 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
299 auto varSwitch = Map<RColumnSwitch>(globalIndex);
300 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
301 *tag = varSwitch->GetTag();
302 }
303
304 void Flush();
305 void MapPage(const NTupleSize_t index);
306 void MapPage(const RClusterIndex &clusterIndex);
308 RColumnElementBase *GetElement() const { return fElement.get(); }
309 const RColumnModel &GetModel() const { return fModel; }
310 std::uint32_t GetIndex() const { return fIndex; }
316};
317
318} // namespace Detail
319
320} // namespace Experimental
321} // namespace ROOT
322
323#endif
#define R__likely(expr)
Definition RConfig.hxx:599
#define R__unlikely(expr)
Definition RConfig.hxx:598
#define R__ASSERT(e)
Definition TError.h:118
void WriteTo(void *destination, std::size_t count) const
Write one or multiple column elements into destination.
void ReadFrom(void *source, std::size_t count)
Set the column element or an array of elements from the memory location source.
Pairs of C++ type and column type, like float and EColumnType::kReal32.
static RColumn * Create(const RColumnModel &model, std::uint32_t index)
Definition RColumn.hxx:103
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:313
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition RColumn.hxx:176
RColumn & operator=(const RColumn &)=delete
void GetCollectionInfo(const RClusterIndex &clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition RColumn.hxx:287
const RColumnModel & GetModel() const
Definition RColumn.hxx:309
ColumnId_t GetColumnIdSource() const
Definition RColumn.hxx:311
std::uint32_t GetIndex() const
Definition RColumn.hxx:310
RPage fReadPage
The currently mapped page for reading.
Definition RColumn.hxx:71
CppT * MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:237
void AppendV(const RColumnElementBase &elemArray, std::size_t count)
Definition RColumn.hxx:129
RColumnElementBase * GetElement() const
Definition RColumn.hxx:308
void Append(const RColumnElementBase &element)
Definition RColumn.hxx:116
RNTupleVersion GetVersion() const
Definition RColumn.hxx:315
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:225
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition RColumn.hxx:91
void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage)
Definition RColumn.cxx:43
void MapPage(const NTupleSize_t index)
Definition RColumn.cxx:87
void SwapWritePagesIfFull()
Used in Append() and AppendV() to switch pages when the main page reached the target size The other p...
Definition RColumn.hxx:81
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:298
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:213
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition RColumn.hxx:67
void Read(const NTupleSize_t globalIndex, RColumnElementBase *element)
Definition RColumn.hxx:157
CppT * Map(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:219
NTupleSize_t GetGlobalIndex(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:248
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:75
int fWritePageIdx
Index of the current write page.
Definition RColumn.hxx:63
NTupleSize_t GetNElements() const
Definition RColumn.hxx:307
RColumn(const RColumn &)=delete
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:51
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:314
RPageSource * GetPageSource() const
Definition RColumn.hxx:312
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:69
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition RColumn.hxx:73
void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition RColumn.hxx:194
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:54
void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element)
Definition RColumn.hxx:167
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:55
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:264
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:255
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition RColumn.hxx:61
Abstract interface to write data into an ntuple.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition RPage.hxx:88
ClusterSize_t::ValueType GetNElements() const
Definition RPage.hxx:83
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:93
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition RPage.hxx:109
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:91
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition RPage.hxx:120
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:85
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:86
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:87
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of a column in a tree.
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr ColumnId_t kInvalidColumnId
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...