Logo ROOT  
Reference Guide
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
20#include <ROOT/RColumnModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <ROOT/RPageStorage.hxx>
24
25#include <TError.h>
26
27#include <memory>
28#include <vector>
29
30namespace ROOT {
31namespace Experimental {
32namespace Detail {
33
34// clang-format off
35/**
36\class ROOT::Experimental::RColumn
37\ingroup NTuple
38\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
39
40On the primitives data layer, the RColumn and RColumnElement are the equivalents to RField and RTreeValue on the
41logical data layer.
42*/
43// clang-format on
44class RColumn {
45private:
47 /**
48 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
49 * the offset column with index 0 and the character value column with index 1.
50 */
51 std::uint32_t fIndex;
56 /// Open page into which new elements are being written
58 /// The number of elements written resp. available in the column
60 /// The currently mapped page for reading
62 /// The column id is used to find matching pages with content when reading
64 /// Used to pack and unpack pages on writing/reading
65 std::unique_ptr<RColumnElementBase> fElement;
66
67 RColumn(const RColumnModel &model, std::uint32_t index);
68
69public:
70 template <typename CppT, EColumnType ColumnT>
71 static RColumn *Create(const RColumnModel &model, std::uint32_t index) {
72 R__ASSERT(model.GetType() == ColumnT);
73 auto column = new RColumn(model, index);
74 column->fElement = std::unique_ptr<RColumnElementBase>(new RColumnElement<CppT, ColumnT>(nullptr));
75 return column;
76 }
77
78 RColumn(const RColumn&) = delete;
79 RColumn &operator =(const RColumn&) = delete;
80 ~RColumn();
81
82 void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage);
83
84 void Append(const RColumnElementBase &element) {
85 void *dst = fHeadPage.TryGrow(1);
86 if (dst == nullptr) {
87 Flush();
88 dst = fHeadPage.TryGrow(1);
89 R__ASSERT(dst != nullptr);
90 }
91 element.WriteTo(dst, 1);
92 fNElements++;
93 }
94
95 void AppendV(const RColumnElementBase &elemArray, std::size_t count) {
96 void *dst = fHeadPage.TryGrow(count);
97 if (dst == nullptr) {
98 for (unsigned i = 0; i < count; ++i) {
99 Append(RColumnElementBase(elemArray, i));
100 }
101 return;
102 }
103 elemArray.WriteTo(dst, count);
104 fNElements += count;
105 }
106
107 void Read(const NTupleSize_t globalIndex, RColumnElementBase *element) {
108 if (!fCurrentPage.Contains(globalIndex)) {
109 MapPage(globalIndex);
110 }
111 void *src = static_cast<unsigned char *>(fCurrentPage.GetBuffer()) +
112 (globalIndex - fCurrentPage.GetGlobalRangeFirst()) * element->GetSize();
113 element->ReadFrom(src, 1);
114 }
115
116 void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element) {
117 if (!fCurrentPage.Contains(clusterIndex)) {
118 MapPage(clusterIndex);
119 }
120 void *src = static_cast<unsigned char *>(fCurrentPage.GetBuffer()) +
121 (clusterIndex.GetIndex() - fCurrentPage.GetClusterRangeFirst()) * element->GetSize();
122 element->ReadFrom(src, 1);
123 }
124
125 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray) {
126 if (!fCurrentPage.Contains(globalIndex)) {
127 MapPage(globalIndex);
128 }
129 NTupleSize_t idxInPage = globalIndex - fCurrentPage.GetGlobalRangeFirst();
130
131 void *src = static_cast<unsigned char *>(fCurrentPage.GetBuffer()) + idxInPage * elemArray->GetSize();
132 if (globalIndex + count <= fCurrentPage.GetGlobalRangeLast() + 1) {
133 elemArray->ReadFrom(src, count);
134 } else {
136 elemArray->ReadFrom(src, nBatch);
137 RColumnElementBase elemTail(*elemArray, nBatch);
138 ReadV(globalIndex + nBatch, count - nBatch, &elemTail);
139 }
140 }
141
142 void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
143 {
144 if (!fCurrentPage.Contains(clusterIndex)) {
145 MapPage(clusterIndex);
146 }
147 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fCurrentPage.GetClusterRangeFirst();
148
149 void* src = static_cast<unsigned char *>(fCurrentPage.GetBuffer()) + idxInPage * elemArray->GetSize();
150 if (clusterIndex.GetIndex() + count <= fCurrentPage.GetClusterRangeLast() + 1) {
151 elemArray->ReadFrom(src, count);
152 } else {
154 elemArray->ReadFrom(src, nBatch);
155 RColumnElementBase elemTail(*elemArray, nBatch);
156 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, &elemTail);
157 }
158 }
159
160 template <typename CppT, EColumnType ColumnT>
161 CppT *Map(const NTupleSize_t globalIndex) {
162 if (!fCurrentPage.Contains(globalIndex)) {
163 MapPage(globalIndex);
164 }
165 return reinterpret_cast<CppT*>(
166 static_cast<unsigned char *>(fCurrentPage.GetBuffer()) +
168 }
169
170 template <typename CppT, EColumnType ColumnT>
171 CppT *Map(const RClusterIndex &clusterIndex) {
172 if (!fCurrentPage.Contains(clusterIndex)) {
173 MapPage(clusterIndex);
174 }
175 return reinterpret_cast<CppT*>(
176 static_cast<unsigned char *>(fCurrentPage.GetBuffer()) +
178 }
179
181 if (!fCurrentPage.Contains(clusterIndex)) {
182 MapPage(clusterIndex);
183 }
184 return fCurrentPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
185 }
186
188 if (!fCurrentPage.Contains(globalIndex)) {
189 MapPage(globalIndex);
190 }
192 globalIndex - fCurrentPage.GetClusterInfo().GetIndexOffset());
193 }
194
195 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
196 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
197 {
198 auto idxStart = (globalIndex == 0) ? 0 : *Map<ClusterSize_t, EColumnType::kIndex>(globalIndex - 1);
199 auto idxEnd = *Map<ClusterSize_t, EColumnType::kIndex>(globalIndex);
200 auto selfOffset = fCurrentPage.GetClusterInfo().GetIndexOffset();
201 if (globalIndex == selfOffset) {
202 // Passed cluster boundary
203 idxStart = 0;
204 }
205 *collectionSize = idxEnd - idxStart;
206 *collectionStart = RClusterIndex(fCurrentPage.GetClusterInfo().GetId(), idxStart);
207 }
208
209 void GetCollectionInfo(const RClusterIndex &clusterIndex,
210 RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
211 {
212 auto index = clusterIndex.GetIndex();
213 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t, EColumnType::kIndex>(clusterIndex - 1);
214 auto idxEnd = *Map<ClusterSize_t, EColumnType::kIndex>(clusterIndex);
215 *collectionSize = idxEnd - idxStart;
216 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
217 }
218
219 /// Get the currently active cluster id
220 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
221 auto varSwitch = Map<RColumnSwitch, EColumnType::kSwitch>(globalIndex);
222 *varIndex = RClusterIndex(fCurrentPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
223 *tag = varSwitch->GetTag();
224 }
225
226 void Flush();
227 void MapPage(const NTupleSize_t index);
228 void MapPage(const RClusterIndex &clusterIndex);
230 RColumnElementBase *GetElement() const { return fElement.get(); }
231 const RColumnModel &GetModel() const { return fModel; }
232 std::uint32_t GetIndex() const { return fIndex; }
238};
239
240} // namespace Detail
241
242} // namespace Experimental
243} // namespace ROOT
244
245#endif
#define R__ASSERT(e)
Definition: TError.h:96
void WriteTo(void *destination, std::size_t count) const
Write one or multiple column elements into destination.
void ReadFrom(void *source, std::size_t count)
Set the column element or an array of elements from the memory location source.
Pairs of C++ type and column type, like float and EColumnType::kReal32.
static RColumn * Create(const RColumnModel &model, std::uint32_t index)
Definition: RColumn.hxx:71
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition: RColumn.hxx:235
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition: RColumn.hxx:125
RColumn & operator=(const RColumn &)=delete
void GetCollectionInfo(const RClusterIndex &clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition: RColumn.hxx:209
const RColumnModel & GetModel() const
Definition: RColumn.hxx:231
RPage fHeadPage
Open page into which new elements are being written.
Definition: RColumn.hxx:57
ColumnId_t GetColumnIdSource() const
Definition: RColumn.hxx:233
std::uint32_t GetIndex() const
Definition: RColumn.hxx:232
void AppendV(const RColumnElementBase &elemArray, std::size_t count)
Definition: RColumn.hxx:95
RColumnElementBase * GetElement() const
Definition: RColumn.hxx:230
void Append(const RColumnElementBase &element)
Definition: RColumn.hxx:84
RNTupleVersion GetVersion() const
Definition: RColumn.hxx:237
CppT * Map(const NTupleSize_t globalIndex)
Definition: RColumn.hxx:161
void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage)
Definition: RColumn.cxx:39
void MapPage(const NTupleSize_t index)
Definition: RColumn.cxx:66
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition: RColumn.hxx:220
void Read(const NTupleSize_t globalIndex, RColumnElementBase *element)
Definition: RColumn.hxx:107
NTupleSize_t GetGlobalIndex(const RClusterIndex &clusterIndex)
Definition: RColumn.hxx:180
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition: RColumn.hxx:65
NTupleSize_t GetNElements() const
Definition: RColumn.hxx:229
RColumn(const RColumn &)=delete
RPage fCurrentPage
The currently mapped page for reading.
Definition: RColumn.hxx:61
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition: RColumn.hxx:51
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition: RColumn.hxx:236
RPageSource * GetPageSource() const
Definition: RColumn.hxx:234
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition: RColumn.hxx:59
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition: RColumn.hxx:63
CppT * Map(const RClusterIndex &clusterIndex)
Definition: RColumn.hxx:171
void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition: RColumn.hxx:142
RPageStorage::ColumnHandle_t fHandleSink
Definition: RColumn.hxx:54
RColumn(const RColumnModel &model, std::uint32_t index)
Definition: RColumn.cxx:24
void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element)
Definition: RColumn.hxx:116
RPageStorage::ColumnHandle_t fHandleSource
Definition: RColumn.hxx:55
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition: RColumn.hxx:196
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition: RColumn.hxx:187
Abstract interface to write data into an ntuple.
Abstract interface to read data from an ntuple.
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition: RPage.hxx:87
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
bool Contains(NTupleSize_t globalIndex) const
Definition: RPage.hxx:92
const RClusterInfo & GetClusterInfo() const
Definition: RPage.hxx:90
NTupleSize_t GetGlobalRangeFirst() const
Definition: RPage.hxx:84
NTupleSize_t GetGlobalRangeLast() const
Definition: RPage.hxx:85
void * TryGrow(ClusterSize_t::ValueType nElements)
Return a pointer after the last element that has space for nElements new elements.
Definition: RPage.hxx:107
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition: RPage.hxx:86
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:83
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of a column in a tree.
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:43
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:79
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:75
VSD Structures.
Definition: StringConv.hxx:21
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:46