Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <ROOT/RPageStorage.hxx>
24
25#include <TError.h>
26
27#include <cstring> // for memcpy
28#include <memory>
29#include <utility>
30
32
33// clang-format off
34/**
35\class ROOT::Experimental::Internal::RColumn
36\ingroup NTuple
37\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
38*/
39// clang-format on
40class RColumn {
41private:
43 /// Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
44 /// the offset column with index 0 and the character value column with index 1.
45 std::uint32_t fIndex;
46 /// Fields can have multiple column representations, distinguished by representation index
47 std::uint16_t fRepresentationIndex;
48 RPageSink *fPageSink = nullptr;
52 /// The page into which new elements are being written. The page will initially be small
53 /// (RNTupleWriteOptions::fInitialUnzippedPageSize, which corresponds to fInitialElements) and expand as needed and
54 /// as memory for page buffers is still available (RNTupleWriteOptions::fPageBufferBudget) or the maximum page
55 /// size is reached (RNTupleWriteOptions::fMaxUnzippedPageSize).
57 /// The initial number of elements in a page
59 /// The number of elements written resp. available in the column
61 /// The currently mapped page for reading
63 /// The column id in the column descriptor, once connected to a sink or source
65 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
67 /// Used to pack and unpack pages on writing/reading
68 std::unique_ptr<RColumnElementBase> fElement;
69 /// The column team is a set of columns that serve the same column index for different representation IDs.
70 /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns
71 /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed
72 /// column for a MapPage() call, it get the page from the active column in the corresponding cluster.
73 std::vector<RColumn *> fTeam;
74 /// Points into fTeam to the column that successfully returned the last page.
75 std::size_t fLastGoodTeamIdx = 0;
76
77 RColumn(EColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex);
78
79 /// Used when trying to append to a full write page. If possible, expand the page. Otherwise, flush and reset
80 /// to the minimal size.
82 {
83 auto newMaxElements = fWritePage.GetMaxElements() * 2;
84 if (newMaxElements * fElement->GetSize() > fPageSink->GetWriteOptions().GetMaxUnzippedPageSize()) {
85 newMaxElements = fPageSink->GetWriteOptions().GetMaxUnzippedPageSize() / fElement->GetSize();
86 }
87
88 if (newMaxElements == fWritePage.GetMaxElements()) {
89 // Maximum page size reached, flush and reset
90 Flush();
91 } else {
92 auto expandedPage = fPageSink->ReservePage(fHandleSink, newMaxElements);
93 if (expandedPage.IsNull()) {
94 Flush();
95 } else {
96 memcpy(expandedPage.GetBuffer(), fWritePage.GetBuffer(), fWritePage.GetNBytes());
97 expandedPage.Reset(fNElements);
98 expandedPage.GrowUnchecked(fWritePage.GetNElements());
99 fWritePage = std::move(expandedPage);
100 }
101 }
102
104 }
105
106public:
107 template <typename CppT>
108 static std::unique_ptr<RColumn> Create(EColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
109 {
110 auto column = std::unique_ptr<RColumn>(new RColumn(type, columnIdx, representationIdx));
111 column->fElement = RColumnElementBase::Generate<CppT>(type);
112 return column;
113 }
114
115 RColumn(const RColumn &) = delete;
116 RColumn &operator=(const RColumn &) = delete;
117 ~RColumn();
118
119 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
120 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
121 /// to be mapped.
122 void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex = 0U);
123 /// Connect the column to a page source.
124 void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource);
125
126 void Append(const void *from)
127 {
130 }
131
132 void *dst = fWritePage.GrowUnchecked(1);
133
134 std::memcpy(dst, from, fElement->GetSize());
135 fNElements++;
136 }
137
138 void AppendV(const void *from, std::size_t count)
139 {
140 auto src = reinterpret_cast<const unsigned char *>(from);
141 // TODO(jblomer): A future optimization should grow the page in one go, up to the maximum unzipped page size
142 while (count > 0) {
143 std::size_t nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
144 if (nElementsRemaining == 0) {
146 nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
147 }
148
149 assert(nElementsRemaining > 0);
150 auto nBatch = std::min(count, nElementsRemaining);
151
152 void *dst = fWritePage.GrowUnchecked(nBatch);
153 std::memcpy(dst, src, nBatch * fElement->GetSize());
154 src += nBatch * fElement->GetSize();
155 count -= nBatch;
156 fNElements += nBatch;
157 }
158 }
159
160 void Read(const NTupleSize_t globalIndex, void *to)
161 {
162 if (!fReadPageRef.Get().Contains(globalIndex)) {
163 MapPage(globalIndex);
164 }
165 const auto elemSize = fElement->GetSize();
166 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
167 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * elemSize;
168 std::memcpy(to, from, elemSize);
169 }
170
171 void Read(RClusterIndex clusterIndex, void *to)
172 {
173 if (!fReadPageRef.Get().Contains(clusterIndex)) {
174 MapPage(clusterIndex);
175 }
176 const auto elemSize = fElement->GetSize();
177 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
178 (clusterIndex.GetIndex() - fReadPageRef.Get().GetClusterRangeFirst()) * elemSize;
179 std::memcpy(to, from, elemSize);
180 }
181
182 void ReadV(NTupleSize_t globalIndex, NTupleSize_t count, void *to)
183 {
184 const auto elemSize = fElement->GetSize();
185 auto tail = static_cast<unsigned char *>(to);
186
187 while (count > 0) {
188 if (!fReadPageRef.Get().Contains(globalIndex)) {
189 MapPage(globalIndex);
190 }
191 const NTupleSize_t idxInPage = globalIndex - fReadPageRef.Get().GetGlobalRangeFirst();
192
193 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
194 const NTupleSize_t nBatch = std::min(fReadPageRef.Get().GetNElements() - idxInPage, count);
195
196 std::memcpy(tail, from, elemSize * nBatch);
197
198 tail += nBatch * elemSize;
199 count -= nBatch;
200 globalIndex += nBatch;
201 }
202 }
203
204 void ReadV(RClusterIndex clusterIndex, NTupleSize_t count, void *to)
205 {
206 const auto elemSize = fElement->GetSize();
207 auto tail = static_cast<unsigned char *>(to);
208
209 while (count > 0) {
210 if (!fReadPageRef.Get().Contains(clusterIndex)) {
211 MapPage(clusterIndex);
212 }
213 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPageRef.Get().GetClusterRangeFirst();
214
215 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
216 const NTupleSize_t nBatch = std::min(count, fReadPageRef.Get().GetNElements() - idxInPage);
217
218 std::memcpy(tail, from, elemSize * nBatch);
219
220 tail += nBatch * elemSize;
221 count -= nBatch;
222 clusterIndex = RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch);
223 }
224 }
225
226 template <typename CppT>
227 CppT *Map(const NTupleSize_t globalIndex)
228 {
229 NTupleSize_t nItems;
230 return MapV<CppT>(globalIndex, nItems);
231 }
232
233 template <typename CppT>
234 CppT *Map(RClusterIndex clusterIndex)
235 {
236 NTupleSize_t nItems;
237 return MapV<CppT>(clusterIndex, nItems);
238 }
239
240 template <typename CppT>
241 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
242 {
243 if (R__unlikely(!fReadPageRef.Get().Contains(globalIndex))) {
244 MapPage(globalIndex);
245 }
246 // +1 to go from 0-based indexing to 1-based number of items
247 nItems = fReadPageRef.Get().GetGlobalRangeLast() - globalIndex + 1;
248 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
249 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * sizeof(CppT));
250 }
251
252 template <typename CppT>
253 CppT *MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
254 {
255 if (!fReadPageRef.Get().Contains(clusterIndex)) {
256 MapPage(clusterIndex);
257 }
258 // +1 to go from 0-based indexing to 1-based number of items
259 nItems = fReadPageRef.Get().GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
260 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
261 (clusterIndex.GetIndex() - fReadPageRef.Get().GetClusterRangeFirst()) *
262 sizeof(CppT));
263 }
264
266 {
267 if (!fReadPageRef.Get().Contains(clusterIndex)) {
268 MapPage(clusterIndex);
269 }
270 return fReadPageRef.Get().GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
271 }
272
274 {
275 if (!fReadPageRef.Get().Contains(globalIndex)) {
276 MapPage(globalIndex);
277 }
279 globalIndex - fReadPageRef.Get().GetClusterInfo().GetIndexOffset());
280 }
281
282 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
283 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, NTupleSize_t *collectionSize)
284 {
285 NTupleSize_t idxStart = 0;
286 NTupleSize_t idxEnd;
287 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
288 if (R__likely(globalIndex > 0)) {
289 if (R__likely(fReadPageRef.Get().Contains(globalIndex - 1))) {
290 idxStart = *Map<RColumnIndex>(globalIndex - 1);
291 idxEnd = *Map<RColumnIndex>(globalIndex);
292 if (R__unlikely(fReadPageRef.Get().GetClusterInfo().GetIndexOffset() == globalIndex))
293 idxStart = 0;
294 } else {
295 idxEnd = *Map<RColumnIndex>(globalIndex);
296 auto selfOffset = fReadPageRef.Get().GetClusterInfo().GetIndexOffset();
297 idxStart = (globalIndex == selfOffset) ? 0 : *Map<RColumnIndex>(globalIndex - 1);
298 }
299 } else {
300 idxEnd = *Map<RColumnIndex>(globalIndex);
301 }
302 *collectionSize = idxEnd - idxStart;
303 *collectionStart = RClusterIndex(fReadPageRef.Get().GetClusterInfo().GetId(), idxStart);
304 }
305
306 void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, NTupleSize_t *collectionSize)
307 {
308 auto index = clusterIndex.GetIndex();
309 auto idxStart = (index == 0) ? 0 : *Map<RColumnIndex>(clusterIndex - 1);
310 auto idxEnd = *Map<RColumnIndex>(clusterIndex);
311 *collectionSize = idxEnd - idxStart;
312 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
313 }
314
315 /// Get the currently active cluster id
316 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
317 {
318 auto varSwitch = Map<RColumnSwitch>(globalIndex);
319 *varIndex = RClusterIndex(fReadPageRef.Get().GetClusterInfo().GetId(), varSwitch->GetIndex());
320 *tag = varSwitch->GetTag();
321 }
322
323 void Flush();
324 void CommitSuppressed();
325
326 void MapPage(NTupleSize_t globalIndex) { R__ASSERT(TryMapPage(globalIndex)); }
327 void MapPage(RClusterIndex clusterIndex) { R__ASSERT(TryMapPage(clusterIndex)); }
328 bool TryMapPage(NTupleSize_t globalIndex);
329 bool TryMapPage(RClusterIndex clusterIndex);
330
331 bool ReadPageContains(NTupleSize_t globalIndex) const { return fReadPageRef.Get().Contains(globalIndex); }
332 bool ReadPageContains(RClusterIndex clusterIndex) const { return fReadPageRef.Get().Contains(clusterIndex); }
333
334 void MergeTeams(RColumn &other);
335
337 RColumnElementBase *GetElement() const { return fElement.get(); }
338 EColumnType GetType() const { return fType; }
339 std::uint16_t GetBitsOnStorage() const
340 {
341 assert(fElement);
342 return static_cast<std::uint16_t>(fElement->GetBitsOnStorage());
343 }
344 std::optional<std::pair<double, double>> GetValueRange() const
345 {
346 assert(fElement);
347 return fElement->GetValueRange();
348 }
349 std::uint32_t GetIndex() const { return fIndex; }
350 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
354 RPageSink *GetPageSink() const { return fPageSink; }
357
358 void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
359 std::size_t GetWritePageCapacity() const { return fWritePage.GetCapacity(); }
360 void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
361}; // class RColumn
362
363} // namespace ROOT::Experimental::Internal
364
365#endif
#define R__likely(expr)
Definition RConfig.hxx:603
#define R__unlikely(expr)
Definition RConfig.hxx:602
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
A column element encapsulates the translation between basic C++ types and their column representation...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:40
RColumn(const RColumn &)=delete
std::size_t fLastGoodTeamIdx
Points into fTeam to the column that successfully returned the last page.
Definition RColumn.hxx:75
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:356
void Read(RClusterIndex clusterIndex, void *to)
Definition RColumn.hxx:171
void ReadV(NTupleSize_t globalIndex, NTupleSize_t count, void *to)
Definition RColumn.hxx:182
void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:40
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:68
void HandleWritePageIfFull()
Used when trying to append to a full write page.
Definition RColumn.hxx:81
NTupleSize_t GetGlobalIndex(RClusterIndex clusterIndex)
Definition RColumn.hxx:265
RColumnElementBase * GetElement() const
Definition RColumn.hxx:337
RPageRef fReadPageRef
The currently mapped page for reading.
Definition RColumn.hxx:62
void ReadV(RClusterIndex clusterIndex, NTupleSize_t count, void *to)
Definition RColumn.hxx:204
bool TryMapPage(NTupleSize_t globalIndex)
Definition RColumn.cxx:85
void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, NTupleSize_t *collectionSize)
Definition RColumn.hxx:306
void SetBitsOnStorage(std::size_t bits)
Definition RColumn.hxx:358
void MapPage(RClusterIndex clusterIndex)
Definition RColumn.hxx:327
DescriptorId_t GetOnDiskId() const
Definition RColumn.hxx:351
void MapPage(NTupleSize_t globalIndex)
Definition RColumn.hxx:326
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:227
RPageSource * GetPageSource() const
Definition RColumn.hxx:353
static std::unique_ptr< RColumn > Create(EColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
Definition RColumn.hxx:108
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:138
std::optional< std::pair< double, double > > GetValueRange() const
Definition RColumn.hxx:344
void Append(const void *from)
Definition RColumn.hxx:126
CppT * Map(RClusterIndex clusterIndex)
Definition RColumn.hxx:234
CppT * MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:253
RColumn & operator=(const RColumn &)=delete
bool ReadPageContains(RClusterIndex clusterIndex) const
Definition RColumn.hxx:332
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:51
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:60
std::uint16_t GetRepresentationIndex() const
Definition RColumn.hxx:350
std::vector< RColumn * > fTeam
The column team is a set of columns that serve the same column index for different representation IDs...
Definition RColumn.hxx:73
void Read(const NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:160
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:45
std::size_t GetWritePageCapacity() const
Definition RColumn.hxx:359
void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:57
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:283
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:241
std::uint16_t fRepresentationIndex
Fields can have multiple column representations, distinguished by representation index.
Definition RColumn.hxx:47
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:316
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:50
void SetValueRange(double min, double max)
Definition RColumn.hxx:360
NTupleSize_t fInitialNElements
The initial number of elements in a page.
Definition RColumn.hxx:58
DescriptorId_t fOnDiskId
The column id in the column descriptor, once connected to a sink or source.
Definition RColumn.hxx:64
NTupleSize_t GetNElements() const
Definition RColumn.hxx:336
RPage fWritePage
The page into which new elements are being written.
Definition RColumn.hxx:56
bool ReadPageContains(NTupleSize_t globalIndex) const
Definition RColumn.hxx:331
NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:352
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:355
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:273
NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:66
std::uint16_t GetBitsOnStorage() const
Definition RColumn.hxx:339
Reference to a page stored in the page pool.
Abstract interface to write data into an ntuple.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:47
std::size_t GetCapacity() const
Definition RPage.hxx:118
std::size_t GetNBytes() const
The space taken by column elements in the buffer.
Definition RPage.hxx:114
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:125
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:129
void * GrowUnchecked(std::uint32_t nElements)
Increases the number elements in the page.
Definition RPage.hxx:151
NTupleSize_t GetClusterRangeFirst() const
Definition RPage.hxx:127
std::uint32_t GetNElements() const
Definition RPage.hxx:123
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:131
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:126
NTupleSize_t GetClusterRangeLast() const
Definition RPage.hxx:128
std::uint32_t GetMaxElements() const
Definition RPage.hxx:124
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId