Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <ROOT/RPageStorage.hxx>
24
25#include <TError.h>
26
27#include <cstring> // for memcpy
28#include <memory>
29#include <utility>
30
32
33// clang-format off
34/**
35\class ROOT::Experimental::Internal::RColumn
36\ingroup NTuple
37\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
38*/
39// clang-format on
40class RColumn {
41private:
43 /// Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
44 /// the offset column with index 0 and the character value column with index 1.
45 std::uint32_t fIndex;
46 /// Fields can have multiple column representations, distinguished by representation index
47 std::uint16_t fRepresentationIndex;
48 RPageSink *fPageSink = nullptr;
52 /// The page into which new elements are being written. The page will initially be small
53 /// (RNTupleWriteOptions::fInitialUnzippedPageSize, which corresponds to fInitialElements) and expand as needed and
54 /// as memory for page buffers is still available (RNTupleWriteOptions::fPageBufferBudget) or the maximum page
55 /// size is reached (RNTupleWriteOptions::fMaxUnzippedPageSize).
57 /// The initial number of elements in a page
59 /// The number of elements written resp. available in the column
61 /// The currently mapped page for reading
63 /// The column id in the column descriptor, once connected to a sink or source
65 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
67 /// Used to pack and unpack pages on writing/reading
68 std::unique_ptr<ROOT::Internal::RColumnElementBase> fElement;
69 /// The column team is a set of columns that serve the same column index for different representation IDs.
70 /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns
71 /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed
72 /// column for a MapPage() call, it get the page from the active column in the corresponding cluster.
73 std::vector<RColumn *> fTeam;
74 /// Points into fTeam to the column that successfully returned the last page.
75 std::size_t fLastGoodTeamIdx = 0;
76
78
79 /// Used when trying to append to a full write page. If possible, expand the page. Otherwise, flush and reset
80 /// to the minimal size.
82 {
84 if (newMaxElements * fElement->GetSize() > fPageSink->GetWriteOptions().GetMaxUnzippedPageSize()) {
85 newMaxElements = fPageSink->GetWriteOptions().GetMaxUnzippedPageSize() / fElement->GetSize();
86 }
87
89 // Maximum page size reached, flush and reset
90 Flush();
91 } else {
93 if (expandedPage.IsNull()) {
94 Flush();
95 } else {
98 expandedPage.GrowUnchecked(fWritePage.GetNElements());
99 fWritePage = std::move(expandedPage);
100 }
101 }
102
104 }
105
106public:
107 template <typename CppT>
108 static std::unique_ptr<RColumn>
110 {
111 auto column = std::unique_ptr<RColumn>(new RColumn(type, columnIdx, representationIdx));
112 column->fElement = ROOT::Internal::RColumnElementBase::Generate<CppT>(type);
113 return column;
114 }
115
116 RColumn(const RColumn &) = delete;
117 RColumn &operator=(const RColumn &) = delete;
118 ~RColumn();
119
120 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
121 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
122 /// to be mapped.
124 /// Connect the column to a page source.
126
127 void Append(const void *from)
128 {
131 }
132
133 void *dst = fWritePage.GrowUnchecked(1);
134
135 std::memcpy(dst, from, fElement->GetSize());
136 fNElements++;
137 }
138
139 void AppendV(const void *from, std::size_t count)
140 {
141 auto src = reinterpret_cast<const unsigned char *>(from);
142 // TODO(jblomer): A future optimization should grow the page in one go, up to the maximum unzipped page size
143 while (count > 0) {
145 if (nElementsRemaining == 0) {
148 }
149
151 auto nBatch = std::min(count, nElementsRemaining);
152
154 std::memcpy(dst, src, nBatch * fElement->GetSize());
155 src += nBatch * fElement->GetSize();
156 count -= nBatch;
158 }
159 }
160
161 void Read(const ROOT::NTupleSize_t globalIndex, void *to)
162 {
163 if (!fReadPageRef.Get().Contains(globalIndex)) {
165 }
166 const auto elemSize = fElement->GetSize();
167 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
168 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * elemSize;
169 std::memcpy(to, from, elemSize);
170 }
171
173 {
174 if (!fReadPageRef.Get().Contains(localIndex)) {
176 }
177 const auto elemSize = fElement->GetSize();
178 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
179 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) * elemSize;
180 std::memcpy(to, from, elemSize);
181 }
182
184 {
185 const auto elemSize = fElement->GetSize();
186 auto tail = static_cast<unsigned char *>(to);
187
188 while (count > 0) {
189 if (!fReadPageRef.Get().Contains(globalIndex)) {
191 }
192 const ROOT::NTupleSize_t idxInPage = globalIndex - fReadPageRef.Get().GetGlobalRangeFirst();
193
194 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
195 const ROOT::NTupleSize_t nBatch = std::min(fReadPageRef.Get().GetNElements() - idxInPage, count);
196
197 std::memcpy(tail, from, elemSize * nBatch);
198
199 tail += nBatch * elemSize;
200 count -= nBatch;
202 }
203 }
204
206 {
207 const auto elemSize = fElement->GetSize();
208 auto tail = static_cast<unsigned char *>(to);
209
210 while (count > 0) {
211 if (!fReadPageRef.Get().Contains(localIndex)) {
213 }
214 ROOT::NTupleSize_t idxInPage = localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst();
215
216 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
217 const ROOT::NTupleSize_t nBatch = std::min(count, fReadPageRef.Get().GetNElements() - idxInPage);
218
219 std::memcpy(tail, from, elemSize * nBatch);
220
221 tail += nBatch * elemSize;
222 count -= nBatch;
223 localIndex = RNTupleLocalIndex(localIndex.GetClusterId(), localIndex.GetIndexInCluster() + nBatch);
224 }
225 }
226
227 template <typename CppT>
233
234 template <typename CppT>
240
241 template <typename CppT>
243 {
244 if (R__unlikely(!fReadPageRef.Get().Contains(globalIndex))) {
246 }
247 // +1 to go from 0-based indexing to 1-based number of items
248 nItems = fReadPageRef.Get().GetGlobalRangeLast() - globalIndex + 1;
249 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
250 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * sizeof(CppT));
251 }
252
253 template <typename CppT>
255 {
256 if (!fReadPageRef.Get().Contains(localIndex)) {
258 }
259 // +1 to go from 0-based indexing to 1-based number of items
260 nItems = fReadPageRef.Get().GetLocalRangeLast() - localIndex.GetIndexInCluster() + 1;
261 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
262 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) *
263 sizeof(CppT));
264 }
265
267 {
268 if (!fReadPageRef.Get().Contains(clusterIndex)) {
270 }
271 return fReadPageRef.Get().GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndexInCluster();
272 }
273
275 {
276 if (!fReadPageRef.Get().Contains(globalIndex)) {
278 }
279 return RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(),
280 globalIndex - fReadPageRef.Get().GetClusterInfo().GetIndexOffset());
281 }
282
283 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
286 {
289 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
290 if (R__likely(globalIndex > 0)) {
291 if (R__likely(fReadPageRef.Get().Contains(globalIndex - 1))) {
294 if (R__unlikely(fReadPageRef.Get().GetClusterInfo().GetIndexOffset() == globalIndex))
295 idxStart = 0;
296 } else {
298 auto selfOffset = fReadPageRef.Get().GetClusterInfo().GetIndexOffset();
300 }
301 } else {
303 }
305 *collectionStart = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), idxStart);
306 }
307
317
318 /// Get the currently active cluster id
320 {
322 *varIndex = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), varSwitch->GetIndex());
323 *tag = varSwitch->GetTag();
324 }
325
326 void Flush();
327 void CommitSuppressed();
328
333
336
337 void MergeTeams(RColumn &other);
338
342 std::uint16_t GetBitsOnStorage() const
343 {
345 return static_cast<std::uint16_t>(fElement->GetBitsOnStorage());
346 }
347 std::optional<std::pair<double, double>> GetValueRange() const
348 {
350 return fElement->GetValueRange();
351 }
352 std::uint32_t GetIndex() const { return fIndex; }
353 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
357 RPageSink *GetPageSink() const { return fPageSink; }
360
361 void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
362 std::size_t GetWritePageCapacity() const { return fWritePage.GetCapacity(); }
363 void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
364}; // class RColumn
365
366} // namespace ROOT::Experimental::Internal
367
368#endif
#define R__likely(expr)
Definition RConfig.hxx:603
#define R__unlikely(expr)
Definition RConfig.hxx:602
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:40
RColumn(const RColumn &)=delete
std::size_t fLastGoodTeamIdx
Points into fTeam to the column that successfully returned the last page.
Definition RColumn.hxx:75
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:359
void HandleWritePageIfFull()
Used when trying to append to a full write page.
Definition RColumn.hxx:81
void ReadV(ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:183
void GetSwitchInfo(ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:319
bool ReadPageContains(ROOT::NTupleSize_t globalIndex) const
Definition RColumn.hxx:334
ROOT::Internal::RPageRef fReadPageRef
The currently mapped page for reading.
Definition RColumn.hxx:62
ROOT::NTupleSize_t GetGlobalIndex(RNTupleLocalIndex clusterIndex)
Definition RColumn.hxx:266
void SetBitsOnStorage(std::size_t bits)
Definition RColumn.hxx:361
void MapPage(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:330
ROOT::NTupleSize_t GetNElements() const
Definition RColumn.hxx:339
ROOT::ENTupleColumnType GetType() const
Definition RColumn.hxx:341
CppT * Map(const ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:228
void Read(const ROOT::NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:161
RPageSource * GetPageSource() const
Definition RColumn.hxx:356
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:139
std::optional< std::pair< double, double > > GetValueRange() const
Definition RColumn.hxx:347
void Append(const void *from)
Definition RColumn.hxx:127
void ConnectPageSource(ROOT::DescriptorId_t fieldId, RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:57
RNTupleLocalIndex GetClusterIndex(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:274
RColumn & operator=(const RColumn &)=delete
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:51
bool TryMapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.cxx:85
std::uint16_t GetRepresentationIndex() const
Definition RColumn.hxx:353
void MapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:329
std::vector< RColumn * > fTeam
The column team is a set of columns that serve the same column index for different representation IDs...
Definition RColumn.hxx:73
CppT * Map(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:235
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:45
std::size_t GetWritePageCapacity() const
Definition RColumn.hxx:362
ROOT::NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:60
static std::unique_ptr< RColumn > Create(ROOT::ENTupleColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
Definition RColumn.hxx:109
ROOT::Internal::RPage fWritePage
The page into which new elements are being written.
Definition RColumn.hxx:56
ROOT::NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:66
RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex)
Definition RColumn.cxx:26
ROOT::NTupleSize_t fInitialNElements
The initial number of elements in a page.
Definition RColumn.hxx:58
std::uint16_t fRepresentationIndex
Fields can have multiple column representations, distinguished by representation index.
Definition RColumn.hxx:47
ROOT::DescriptorId_t GetOnDiskId() const
Definition RColumn.hxx:354
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:50
void SetValueRange(double min, double max)
Definition RColumn.hxx:363
CppT * MapV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:254
bool ReadPageContains(RNTupleLocalIndex localIndex) const
Definition RColumn.hxx:335
void ConnectPageSink(ROOT::DescriptorId_t fieldId, RPageSink &pageSink, ROOT::NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:40
ROOT::NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:355
void GetCollectionInfo(RNTupleLocalIndex localIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
Definition RColumn.hxx:308
CppT * MapV(const ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:242
void ReadV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:205
std::unique_ptr< ROOT::Internal::RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:68
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:358
void Read(RNTupleLocalIndex localIndex, void *to)
Definition RColumn.hxx:172
ROOT::ENTupleColumnType fType
Definition RColumn.hxx:42
ROOT::DescriptorId_t fOnDiskId
The column id in the column descriptor, once connected to a sink or source.
Definition RColumn.hxx:64
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:284
ROOT::Internal::RColumnElementBase * GetElement() const
Definition RColumn.hxx:340
std::uint16_t GetBitsOnStorage() const
Definition RColumn.hxx:342
Abstract interface to write data into an ntuple.
const ROOT::RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
virtual ROOT::Internal::RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
Abstract interface to read data from an ntuple.
A column element encapsulates the translation between basic C++ types and their column representation...
Reference to a page stored in the page pool.
const RPage & Get() const
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:46
std::uint32_t GetNElements() const
Definition RPage.hxx:123
std::size_t GetNBytes() const
The space taken by column elements in the buffer.
Definition RPage.hxx:114
void * GrowUnchecked(std::uint32_t nElements)
Increases the number elements in the page.
Definition RPage.hxx:152
void * GetBuffer() const
Definition RPage.hxx:145
std::uint32_t GetMaxElements() const
Definition RPage.hxx:124
std::size_t GetCapacity() const
Definition RPage.hxx:118
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId