Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <memory>
29#include <utility>
30
31namespace ROOT {
32namespace Experimental {
33namespace Detail {
34
35// clang-format off
36/**
37\class ROOT::Experimental::RColumn
38\ingroup NTuple
39\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
40
41On the primitives data layer, the RColumn and RColumnElement are the equivalents to RField and RFieldValue on the
42logical data layer.
43*/
44// clang-format on
45class RColumn {
46private:
48 /**
49 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
50 * the offset column with index 0 and the character value column with index 1.
51 */
52 std::uint32_t fIndex;
53 RPageSink *fPageSink = nullptr;
57 /// A set of open pages into which new elements are being written. The pages are used
58 /// in rotation. They are 50% bigger than the target size given by the write options.
59 /// The current page is filled until the target size, but it is only committed once the other
60 /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
61 /// page will be committed.
63 /// Index of the current write page
65 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
66 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
67 /// "page full" and "page half full" events when writing the page.
68 std::uint32_t fApproxNElementsPerPage = 0;
69 /// The number of elements written resp. available in the column
71 /// The currently mapped page for reading
73 /// The column id is used to find matching pages with content when reading
75 /// Used to pack and unpack pages on writing/reading
76 std::unique_ptr<RColumnElementBase> fElement;
77
78 RColumn(const RColumnModel &model, std::uint32_t index);
79
80 /// Used in Append() and AppendV() to switch pages when the main page reached the target size
81 /// The other page has been flushed when the main page reached 50%.
84 return;
85
86 fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
89 }
90
91 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
93 auto otherIdx = 1 - fWritePageIdx;
94 if (fWritePage[otherIdx].IsEmpty())
95 return;
97 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
98 // fNElements in SwapWritePagesIfFull() when the pages swap
99 fWritePage[otherIdx].Reset(0);
100 }
101
102public:
103 template <typename CppT>
104 static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
105 {
106 auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
107 column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
108 return column;
109 }
110
111 RColumn(const RColumn&) = delete;
112 RColumn &operator =(const RColumn&) = delete;
113 ~RColumn();
114
115 void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage);
116
117 void Append(const RColumnElementBase &element) {
118 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
119
122 }
123
124 element.WriteTo(dst, 1);
125 fNElements++;
126
128 }
129
130 void AppendV(const RColumnElementBase &elemArray, std::size_t count) {
131 // We might not have enough space in the current page. In this case, fall back to one by one filling.
133 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
134 for (unsigned i = 0; i < count; ++i) {
135 Append(RColumnElementBase(elemArray, i));
136 }
137 return;
138 }
139
140 // The check for flushing the shadow page is more complicated than for the Append() case
141 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
142 // we might instead jump over the 50% fill level.
143 // This check should be done before calling `RPage::GrowUnchecked()` as the latter affects the return value of
144 // `RPage::GetNElements()`.
147 {
149 }
150
151 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
152
153 elemArray.WriteTo(dst, count);
154 fNElements += count;
155
156 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
158 }
159
160 void Read(const NTupleSize_t globalIndex, RColumnElementBase *element) {
161 if (!fReadPage.Contains(globalIndex)) {
162 MapPage(globalIndex);
163 R__ASSERT(fReadPage.Contains(globalIndex));
164 }
165 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
166 (globalIndex - fReadPage.GetGlobalRangeFirst()) * element->GetSize();
167 element->ReadFrom(src, 1);
168 }
169
170 void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element) {
171 if (!fReadPage.Contains(clusterIndex)) {
172 MapPage(clusterIndex);
173 }
174 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
175 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * element->GetSize();
176 element->ReadFrom(src, 1);
177 }
178
179 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray) {
180 R__ASSERT(count > 0);
181 if (!fReadPage.Contains(globalIndex)) {
182 MapPage(globalIndex);
183 }
184 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
185
186 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
187 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
188 elemArray->ReadFrom(src, count);
189 } else {
190 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
191 elemArray->ReadFrom(src, nBatch);
192 RColumnElementBase elemTail(*elemArray, nBatch);
193 ReadV(globalIndex + nBatch, count - nBatch, &elemTail);
194 }
195 }
196
197 void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
198 {
199 if (!fReadPage.Contains(clusterIndex)) {
200 MapPage(clusterIndex);
201 }
202 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
203
204 void* src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
205 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
206 elemArray->ReadFrom(src, count);
207 } else {
208 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
209 elemArray->ReadFrom(src, nBatch);
210 RColumnElementBase elemTail(*elemArray, nBatch);
211 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, &elemTail);
212 }
213 }
214
215 template <typename CppT>
216 CppT *Map(const NTupleSize_t globalIndex) {
217 NTupleSize_t nItems;
218 return MapV<CppT>(globalIndex, nItems);
219 }
220
221 template <typename CppT>
222 CppT *Map(const RClusterIndex &clusterIndex) {
223 NTupleSize_t nItems;
224 return MapV<CppT>(clusterIndex, nItems);
225 }
226
227 template <typename CppT>
228 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
229 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
230 MapPage(globalIndex);
231 }
232 // +1 to go from 0-based indexing to 1-based number of items
233 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
234 return reinterpret_cast<CppT*>(
235 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
237 }
238
239 template <typename CppT>
240 CppT *MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems) {
241 if (!fReadPage.Contains(clusterIndex)) {
242 MapPage(clusterIndex);
243 }
244 // +1 to go from 0-based indexing to 1-based number of items
245 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
246 return reinterpret_cast<CppT*>(
247 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
249 }
250
252 if (!fReadPage.Contains(clusterIndex)) {
253 MapPage(clusterIndex);
254 }
255 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
256 }
257
259 if (!fReadPage.Contains(globalIndex)) {
260 MapPage(globalIndex);
261 }
263 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
264 }
265
266 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
267 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
268 {
269 NTupleSize_t idxStart = 0;
270 NTupleSize_t idxEnd;
271 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
272 if (R__likely(globalIndex > 0)) {
273 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
274 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
275 idxEnd = *Map<ClusterSize_t>(globalIndex);
276 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
277 idxStart = 0;
278 } else {
279 idxEnd = *Map<ClusterSize_t>(globalIndex);
280 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
281 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
282 }
283 } else {
284 idxEnd = *Map<ClusterSize_t>(globalIndex);
285 }
286 *collectionSize = idxEnd - idxStart;
287 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
288 }
289
290 void GetCollectionInfo(const RClusterIndex &clusterIndex,
291 RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
292 {
293 auto index = clusterIndex.GetIndex();
294 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
295 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
296 *collectionSize = idxEnd - idxStart;
297 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
298 }
299
300 /// Get the currently active cluster id
301 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
302 auto varSwitch = Map<RColumnSwitch>(globalIndex);
303 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
304 *tag = varSwitch->GetTag();
305 }
306
307 void Flush();
308 void MapPage(const NTupleSize_t index);
309 void MapPage(const RClusterIndex &clusterIndex);
311 RColumnElementBase *GetElement() const { return fElement.get(); }
312 const RColumnModel &GetModel() const { return fModel; }
313 std::uint32_t GetIndex() const { return fIndex; }
318};
319
320} // namespace Detail
321
322} // namespace Experimental
323} // namespace ROOT
324
325#endif
#define R__likely(expr)
Definition RConfig.hxx:609
#define R__unlikely(expr)
Definition RConfig.hxx:608
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
void WriteTo(void *destination, std::size_t count) const
Write one or multiple column elements into destination.
void ReadFrom(void *source, std::size_t count)
Set the column element or an array of elements from the memory location source.
A column element points either to the content of an RFieldValue or into a memory mapped page.
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:316
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition RColumn.hxx:179
RColumn & operator=(const RColumn &)=delete
void GetCollectionInfo(const RClusterIndex &clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition RColumn.hxx:290
const RColumnModel & GetModel() const
Definition RColumn.hxx:312
ColumnId_t GetColumnIdSource() const
Definition RColumn.hxx:314
std::uint32_t GetIndex() const
Definition RColumn.hxx:313
RPage fReadPage
The currently mapped page for reading.
Definition RColumn.hxx:72
CppT * MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:240
void AppendV(const RColumnElementBase &elemArray, std::size_t count)
Definition RColumn.hxx:130
RColumnElementBase * GetElement() const
Definition RColumn.hxx:311
void Append(const RColumnElementBase &element)
Definition RColumn.hxx:117
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:228
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition RColumn.hxx:92
void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage)
Definition RColumn.cxx:43
void MapPage(const NTupleSize_t index)
Definition RColumn.cxx:87
void SwapWritePagesIfFull()
Used in Append() and AppendV() to switch pages when the main page reached the target size The other p...
Definition RColumn.hxx:82
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:301
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:216
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition RColumn.hxx:68
void Read(const NTupleSize_t globalIndex, RColumnElementBase *element)
Definition RColumn.hxx:160
CppT * Map(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:222
NTupleSize_t GetGlobalIndex(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:251
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:76
int fWritePageIdx
Index of the current write page.
Definition RColumn.hxx:64
NTupleSize_t GetNElements() const
Definition RColumn.hxx:310
RColumn(const RColumn &)=delete
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:52
static std::unique_ptr< RColumn > Create(const RColumnModel &model, std::uint32_t index)
Definition RColumn.hxx:104
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:317
RPageSource * GetPageSource() const
Definition RColumn.hxx:315
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:70
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition RColumn.hxx:74
void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition RColumn.hxx:197
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:55
void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element)
Definition RColumn.hxx:170
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:56
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:267
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:258
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition RColumn.hxx:62
Abstract interface to write data into an ntuple.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition RPage.hxx:88
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:93
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition RPage.hxx:109
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:91
std::uint32_t GetNElements() const
Definition RPage.hxx:83
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition RPage.hxx:120
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:85
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:86
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:87
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of an RNTuple column.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr ColumnId_t kInvalidColumnId
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint32_t.