Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5
6/*************************************************************************
7 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RColumn
15#define ROOT_RColumn
16
17#include <ROOT/RConfig.hxx> // for R__likely
19#include <ROOT/RNTupleUtil.hxx>
20#include <ROOT/RPage.hxx>
21#include <ROOT/RPageStorage.hxx>
22
23#include <TError.h>
24
25#include <cstring> // for memcpy
26#include <memory>
27#include <utility>
28
29namespace ROOT::Internal {
30
31// clang-format off
32/**
33\class ROOT::Internal::RColumn
34\ingroup NTuple
35\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
36*/
37// clang-format on
38class RColumn {
39private:
41 /// Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
42 /// the offset column with index 0 and the character value column with index 1.
43 std::uint32_t fIndex;
44 /// Fields can have multiple column representations, distinguished by representation index
45 std::uint16_t fRepresentationIndex;
50 /// The page into which new elements are being written. The page will initially be small
51 /// (RNTupleWriteOptions::fInitialUnzippedPageSize, which corresponds to fInitialElements) and expand as needed and
52 /// as memory for page buffers is still available (RNTupleWriteOptions::fPageBufferBudget) or the maximum page
53 /// size is reached (RNTupleWriteOptions::fMaxUnzippedPageSize).
55 /// The initial number of elements in a page
57 /// The number of elements written resp. available in the column
59 /// The currently mapped page for reading
61 /// The column id in the column descriptor, once connected to a sink or source
63 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
65 /// Used to pack and unpack pages on writing/reading
66 std::unique_ptr<ROOT::Internal::RColumnElementBase> fElement;
67 /// The column team is a set of columns that serve the same column index for different representation IDs.
68 /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns
69 /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed
70 /// column for a MapPage() call, it get the page from the active column in the corresponding cluster.
71 std::vector<RColumn *> fTeam;
72 /// Points into fTeam to the column that successfully returned the last page.
73 std::size_t fLastGoodTeamIdx = 0;
74
76
77 /// Used when trying to append to a full write page. If possible, expand the page. Otherwise, flush and reset
78 /// to the minimal size.
80 {
82 if (newMaxElements * fElement->GetSize() > fPageSink->GetWriteOptions().GetMaxUnzippedPageSize()) {
83 newMaxElements = fPageSink->GetWriteOptions().GetMaxUnzippedPageSize() / fElement->GetSize();
84 }
85
87 // Maximum page size reached, flush and reset
88 Flush();
89 } else {
91 if (expandedPage.IsNull()) {
92 Flush();
93 } else {
96 expandedPage.GrowUnchecked(fWritePage.GetNElements());
97 fWritePage = std::move(expandedPage);
98 }
99 }
100
102 }
103
104public:
105 template <typename CppT>
106 static std::unique_ptr<RColumn>
108 {
109 auto column = std::unique_ptr<RColumn>(new RColumn(type, columnIdx, representationIdx));
110 column->fElement = ROOT::Internal::RColumnElementBase::Generate<CppT>(type);
111 return column;
112 }
113
114 RColumn(const RColumn &) = delete;
115 RColumn &operator=(const RColumn &) = delete;
116 ~RColumn();
117
118 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
119 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
120 /// to be mapped.
123 /// Connect the column to a page source.
125
126 void Append(const void *from)
127 {
130 }
131
132 void *dst = fWritePage.GrowUnchecked(1);
133
134 std::memcpy(dst, from, fElement->GetSize());
135 fNElements++;
136 }
137
138 void AppendV(const void *from, std::size_t count)
139 {
140 auto src = reinterpret_cast<const unsigned char *>(from);
141 // TODO(jblomer): A future optimization should grow the page in one go, up to the maximum unzipped page size
142 while (count > 0) {
144 if (nElementsRemaining == 0) {
147 }
148
150 auto nBatch = std::min(count, nElementsRemaining);
151
153 std::memcpy(dst, src, nBatch * fElement->GetSize());
154 src += nBatch * fElement->GetSize();
155 count -= nBatch;
157 }
158 }
159
160 void Read(const ROOT::NTupleSize_t globalIndex, void *to)
161 {
162 if (!fReadPageRef.Get().Contains(globalIndex)) {
164 }
165 const auto elemSize = fElement->GetSize();
166 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
167 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * elemSize;
168 std::memcpy(to, from, elemSize);
169 }
170
172 {
173 if (!fReadPageRef.Get().Contains(localIndex)) {
175 }
176 const auto elemSize = fElement->GetSize();
177 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
178 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) * elemSize;
179 std::memcpy(to, from, elemSize);
180 }
181
183 {
184 const auto elemSize = fElement->GetSize();
185 auto tail = static_cast<unsigned char *>(to);
186
187 while (count > 0) {
188 if (!fReadPageRef.Get().Contains(globalIndex)) {
190 }
191 const ROOT::NTupleSize_t idxInPage = globalIndex - fReadPageRef.Get().GetGlobalRangeFirst();
192
193 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
194 const ROOT::NTupleSize_t nBatch = std::min(fReadPageRef.Get().GetNElements() - idxInPage, count);
195
196 std::memcpy(tail, from, elemSize * nBatch);
197
198 tail += nBatch * elemSize;
199 count -= nBatch;
201 }
202 }
203
205 {
206 const auto elemSize = fElement->GetSize();
207 auto tail = static_cast<unsigned char *>(to);
208
209 while (count > 0) {
210 if (!fReadPageRef.Get().Contains(localIndex)) {
212 }
213 ROOT::NTupleSize_t idxInPage = localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst();
214
215 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
216 const ROOT::NTupleSize_t nBatch = std::min(count, fReadPageRef.Get().GetNElements() - idxInPage);
217
218 std::memcpy(tail, from, elemSize * nBatch);
219
220 tail += nBatch * elemSize;
221 count -= nBatch;
222 localIndex = RNTupleLocalIndex(localIndex.GetClusterId(), localIndex.GetIndexInCluster() + nBatch);
223 }
224 }
225
226 template <typename CppT>
232
233 template <typename CppT>
239
240 template <typename CppT>
242 {
243 if (R__unlikely(!fReadPageRef.Get().Contains(globalIndex))) {
245 }
246 // +1 to go from 0-based indexing to 1-based number of items
247 nItems = fReadPageRef.Get().GetGlobalRangeLast() - globalIndex + 1;
248 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
249 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * sizeof(CppT));
250 }
251
252 template <typename CppT>
254 {
255 if (!fReadPageRef.Get().Contains(localIndex)) {
257 }
258 // +1 to go from 0-based indexing to 1-based number of items
259 nItems = fReadPageRef.Get().GetLocalRangeLast() - localIndex.GetIndexInCluster() + 1;
260 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
261 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) *
262 sizeof(CppT));
263 }
264
266 {
267 if (!fReadPageRef.Get().Contains(clusterIndex)) {
269 }
270 return fReadPageRef.Get().GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndexInCluster();
271 }
272
274 {
275 if (!fReadPageRef.Get().Contains(globalIndex)) {
277 }
278 return RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(),
279 globalIndex - fReadPageRef.Get().GetClusterInfo().GetIndexOffset());
280 }
281
282 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
285 {
288 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
289 if (R__likely(globalIndex > 0)) {
290 if (R__likely(fReadPageRef.Get().Contains(globalIndex - 1))) {
293 if (R__unlikely(fReadPageRef.Get().GetClusterInfo().GetIndexOffset() == globalIndex))
294 idxStart = 0;
295 } else {
297 auto selfOffset = fReadPageRef.Get().GetClusterInfo().GetIndexOffset();
299 }
300 } else {
302 }
304 *collectionStart = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), idxStart);
305 }
306
316
317 /// Get the currently active cluster id
319 {
321 *varIndex = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), varSwitch->GetIndex());
322 *tag = varSwitch->GetTag();
323 }
324
325 void Flush();
326 void CommitSuppressed();
327
332
335
336 void MergeTeams(RColumn &other);
337
341 std::uint16_t GetBitsOnStorage() const
342 {
344 return static_cast<std::uint16_t>(fElement->GetBitsOnStorage());
345 }
346 std::optional<std::pair<double, double>> GetValueRange() const
347 {
349 return fElement->GetValueRange();
350 }
351 std::uint32_t GetIndex() const { return fIndex; }
352 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
359
360 void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
361 std::size_t GetWritePageCapacity() const { return fWritePage.GetCapacity(); }
362 void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
363}; // class RColumn
364
365} // namespace ROOT::Internal
366
367#endif
#define R__likely(expr)
Definition RConfig.hxx:595
#define R__unlikely(expr)
Definition RConfig.hxx:594
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
A column element encapsulates the translation between basic C++ types and their column representation...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:38
void GetSwitchInfo(ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:318
void GetCollectionInfo(RNTupleLocalIndex localIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
Definition RColumn.hxx:307
void Read(RNTupleLocalIndex localIndex, void *to)
Definition RColumn.hxx:171
static std::unique_ptr< RColumn > Create(ROOT::ENTupleColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
Definition RColumn.hxx:107
RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex)
ROOT::Internal::RPageSource * GetPageSource() const
Definition RColumn.hxx:355
ROOT::NTupleSize_t GetGlobalIndex(RNTupleLocalIndex clusterIndex)
Definition RColumn.hxx:265
void HandleWritePageIfFull()
Used when trying to append to a full write page.
Definition RColumn.hxx:79
std::optional< std::pair< double, double > > GetValueRange() const
Definition RColumn.hxx:346
void MergeTeams(RColumn &other)
Definition RColumn.cxx:115
RColumn & operator=(const RColumn &)=delete
ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:358
void ReadV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:204
std::vector< RColumn * > fTeam
The column team is a set of columns that serve the same column index for different representation IDs...
Definition RColumn.hxx:71
std::uint16_t GetRepresentationIndex() const
Definition RColumn.hxx:352
bool ReadPageContains(ROOT::NTupleSize_t globalIndex) const
Definition RColumn.hxx:333
std::size_t fLastGoodTeamIdx
Points into fTeam to the column that successfully returned the last page.
Definition RColumn.hxx:73
CppT * Map(const ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:227
CppT * MapV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:253
RNTupleLocalIndex GetClusterIndex(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:273
RColumn(const RColumn &)=delete
std::uint16_t fRepresentationIndex
Fields can have multiple column representations, distinguished by representation index.
Definition RColumn.hxx:45
void ConnectPageSource(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:57
ROOT::Internal::RPageSource * fPageSource
Definition RColumn.hxx:47
ROOT::Internal::RColumnElementBase * GetElement() const
Definition RColumn.hxx:339
ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:357
void SetBitsOnStorage(std::size_t bits)
Definition RColumn.hxx:360
void Read(const ROOT::NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:160
CppT * Map(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:234
ROOT::ENTupleColumnType GetType() const
Definition RColumn.hxx:340
ROOT::NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:354
bool ReadPageContains(RNTupleLocalIndex localIndex) const
Definition RColumn.hxx:334
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:283
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:138
void ReadV(ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:182
std::size_t GetWritePageCapacity() const
Definition RColumn.hxx:361
ROOT::NTupleSize_t GetNElements() const
Definition RColumn.hxx:338
void SetValueRange(double min, double max)
Definition RColumn.hxx:362
ROOT::DescriptorId_t GetOnDiskId() const
Definition RColumn.hxx:353
ROOT::Internal::RPageRef fReadPageRef
The currently mapped page for reading.
Definition RColumn.hxx:60
ROOT::Internal::RPage fWritePage
The page into which new elements are being written.
Definition RColumn.hxx:54
CppT * MapV(const ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:241
ROOT::Internal::RPageSink * fPageSink
Definition RColumn.hxx:46
ROOT::DescriptorId_t fOnDiskId
The column id in the column descriptor, once connected to a sink or source.
Definition RColumn.hxx:62
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:43
bool TryMapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.cxx:85
ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:48
ROOT::NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:64
ROOT::NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:58
void MapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:328
void ConnectPageSink(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSink &pageSink, ROOT::NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:40
ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:49
std::uint16_t GetBitsOnStorage() const
Definition RColumn.hxx:341
std::uint32_t GetIndex() const
Definition RColumn.hxx:351
void Append(const void *from)
Definition RColumn.hxx:126
std::unique_ptr< ROOT::Internal::RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:66
ROOT::Internal::RPageSink * GetPageSink() const
Definition RColumn.hxx:356
void MapPage(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:329
ROOT::ENTupleColumnType fType
Definition RColumn.hxx:40
ROOT::NTupleSize_t fInitialNElements
The initial number of elements in a page.
Definition RColumn.hxx:56
Reference to a page stored in the page pool.
const RPage & Get() const
Abstract interface to write data into an ntuple.
virtual ROOT::Internal::RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
const ROOT::RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:44
std::uint32_t GetNElements() const
Definition RPage.hxx:121
std::size_t GetNBytes() const
The space taken by column elements in the buffer.
Definition RPage.hxx:112
void * GrowUnchecked(std::uint32_t nElements)
Increases the number elements in the page.
Definition RPage.hxx:150
void * GetBuffer() const
Definition RPage.hxx:143
std::uint32_t GetMaxElements() const
Definition RPage.hxx:122
std::size_t GetCapacity() const
Definition RPage.hxx:116
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId