Logo ROOT   6.18/05
Reference Guide
RPageStorageRoot.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RField.hxx>
18#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RPage.hxx>
21#include <ROOT/RPagePool.hxx>
22#include <ROOT/RLogger.hxx>
23
24#include <TKey.h>
25
26#include <cstdlib>
27#include <iostream>
28#include <utility>
29
30
32 : ROOT::Experimental::Detail::RPageSink(ntupleName)
33 , fNTupleName(ntupleName)
34 , fDirectory(nullptr)
35 , fSettings(settings)
36 , fPrevClusterNEntries(0)
37{
38 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
39 "Do not store real data with this version of RNTuple!";
40}
41
43 : ROOT::Experimental::Detail::RPageSink(ntupleName)
44 , fNTupleName(ntupleName)
45 , fDirectory(nullptr)
46{
47 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
48 "Do not store real data with this version of RNTuple!";
49 TFile *file = TFile::Open(std::string(path).c_str(), "UPDATE");
52}
53
55{
56 if (fSettings.fTakeOwnership) {
57 fSettings.fFile->Close();
58 delete fSettings.fFile;
59 }
60}
61
64{
66 columnHeader.fName = column->GetModel().GetName();
67 columnHeader.fType = column->GetModel().GetType();
68 columnHeader.fIsSorted = column->GetModel().GetIsSorted();
69 if (column->GetOffsetColumn() != nullptr) {
70 columnHeader.fOffsetColumn = column->GetOffsetColumn()->GetModel().GetName();
71 }
72 auto columnId = fNTupleHeader.fColumns.size();
73 fNTupleHeader.fColumns.emplace_back(columnHeader);
74 //printf("Added column %s type %d\n", columnHeader.fName.c_str(), (int)columnHeader.fType);
75 return ColumnHandle_t(columnId, column);
76}
77
78
80{
81 fNTupleHeader.fPageSize = kPageSize;
82 fDirectory = fSettings.fFile->mkdir(fNTupleName.c_str());
83
84 unsigned int nColumns = 0;
85 for (auto& f : *model->GetRootField()) {
86 nColumns += f.GetNColumns();
87 }
88 fPagePool = std::make_unique<RPagePool>(fNTupleHeader.fPageSize, nColumns);
89
90 for (auto& f : *model->GetRootField()) {
92 fieldHeader.fName = f.GetName();
93 fieldHeader.fType = f.GetType();
94 //printf("Added field %s type [%s]\n", f.GetName().c_str(), f.GetType().c_str());
95 if (f.GetParent()) fieldHeader.fParentName = f.GetParent()->GetName();
96 fNTupleHeader.fFields.emplace_back(fieldHeader);
97
98 f.ConnectColumns(this); // issues in turn one or several calls to AddColumn()
99 }
100 R__ASSERT(nColumns == fNTupleHeader.fColumns.size());
101
102 fCurrentCluster.fPagesPerColumn.resize(nColumns);
103 fNTupleFooter.fNElementsPerColumn.resize(nColumns, 0);
104 fDirectory->WriteObject(&fNTupleHeader, RMapper::kKeyNTupleHeader);
105}
106
108{
109 auto columnId = columnHandle.fId;
111 pagePayload.fSize = page.GetSize();
112 pagePayload.fContent = static_cast<unsigned char *>(page.GetBuffer());
113 std::string key = std::string(RMapper::kKeyPagePayload) +
114 std::to_string(fNTupleFooter.fNClusters) + RMapper::kKeySeparator +
115 std::to_string(columnId) + RMapper::kKeySeparator +
116 std::to_string(fCurrentCluster.fPagesPerColumn[columnId].fRangeStarts.size());
117 fDirectory->WriteObject(&pagePayload, key.c_str());
118 fCurrentCluster.fPagesPerColumn[columnId].fRangeStarts.push_back(page.GetRangeFirst());
119 fNTupleFooter.fNElementsPerColumn[columnId] += page.GetNElements();
120}
121
123{
124 fCurrentCluster.fNEntries = nEntries - fPrevClusterNEntries;
125 fPrevClusterNEntries = nEntries;
126 std::string key = RMapper::kKeyClusterFooter + std::to_string(fNTupleFooter.fNClusters);
127 fDirectory->WriteObject(&fCurrentCluster, key.c_str());
128 fNTupleFooter.fNClusters++;
129 fNTupleFooter.fNEntries = nEntries;
130
131 for (auto& pageInfo : fCurrentCluster.fPagesPerColumn) {
132 pageInfo.fRangeStarts.clear();
133 }
134 fCurrentCluster.fEntryRangeStart = fNTupleFooter.fNEntries;
135}
136
137
139{
140 if (fDirectory)
141 fDirectory->WriteObject(&fNTupleFooter, RMapper::kKeyNTupleFooter);
142}
143
144
145//------------------------------------------------------------------------------
146
147
149 : ROOT::Experimental::Detail::RPageSource(ntupleName)
150 , fNTupleName(ntupleName)
151 , fDirectory(nullptr)
152 , fSettings(settings)
153{
154}
155
157 : ROOT::Experimental::Detail::RPageSource(ntupleName)
158 , fNTupleName(ntupleName)
159 , fDirectory(nullptr)
160{
161 TFile *file = TFile::Open(std::string(path).c_str(), "READ");
164}
165
166
168{
169 if (fSettings.fTakeOwnership) {
170 fSettings.fFile->Close();
171 delete fSettings.fFile;
172 }
173}
174
175
178{
179 auto& model = column->GetModel();
180 auto columnId = fMapper.fColumnName2Id[model.GetName()];
181 R__ASSERT(model == *fMapper.fId2ColumnModel[columnId]);
182 //printf("Attaching column %s id %d type %d length %lu\n",
183 // column->GetModel().GetName().c_str(), columnId, (int)(column->GetModel().GetType()),
184 // fMapper.fColumnIndex[columnId].fNElements);
185 return ColumnHandle_t(columnId, column);
186}
187
188
190{
191 fDirectory = fSettings.fFile->GetDirectory(fNTupleName.c_str());
192 auto keyNTupleHeader = fDirectory->GetKey(RMapper::kKeyNTupleHeader);
193 auto ntupleHeader = keyNTupleHeader->ReadObject<ROOT::Experimental::Internal::RNTupleHeader>();
194 //printf("Number of fields %lu, of columns %lu\n", ntupleHeader->fFields.size(), ntupleHeader->fColumns.size());
195
196 for (auto &fieldHeader : ntupleHeader->fFields) {
197 if (fieldHeader.fParentName.empty()) {
198 fMapper.fRootFields.push_back(RMapper::RFieldDescriptor(fieldHeader.fName, fieldHeader.fType));
199 }
200 }
201
202 auto nColumns = ntupleHeader->fColumns.size();
203 fPagePool = std::make_unique<RPagePool>(ntupleHeader->fPageSize, nColumns);
204 fMapper.fColumnIndex.resize(nColumns);
205
206 std::int32_t columnId = 0;
207 for (auto &columnHeader : ntupleHeader->fColumns) {
208 auto columnModel = std::make_unique<RColumnModel>(
209 columnHeader.fName, columnHeader.fType, columnHeader.fIsSorted);
210 fMapper.fId2ColumnModel[columnId] = std::move(columnModel);
211 fMapper.fColumnName2Id[columnHeader.fName] = columnId;
212 columnId++;
213 }
214
215 /// Determine column dependencies (offset - pointee relationships)
216 for (auto &columnHeader : ntupleHeader->fColumns) {
217 if (columnHeader.fOffsetColumn.empty()) continue;
218 fMapper.fColumn2Pointee[fMapper.fColumnName2Id[columnHeader.fOffsetColumn]] =
219 fMapper.fColumnName2Id[columnHeader.fName];
220 }
221
222 auto keyNTupleFooter = fDirectory->GetKey(RMapper::kKeyNTupleFooter);
223 auto ntupleFooter = keyNTupleFooter->ReadObject<ROOT::Experimental::Internal::RNTupleFooter>();
224 //printf("Number of clusters: %d, entries %ld\n", ntupleFooter->fNClusters, ntupleFooter->fNEntries);
225
226 for (std::int32_t iCluster = 0; iCluster < ntupleFooter->fNClusters; ++iCluster) {
227 auto keyClusterFooter = fDirectory->GetKey((RMapper::kKeyClusterFooter + std::to_string(iCluster)).c_str());
228 auto clusterFooter = keyClusterFooter->ReadObject<ROOT::Experimental::Internal::RClusterFooter>();
229 R__ASSERT(clusterFooter->fPagesPerColumn.size() == nColumns);
230 for (unsigned iColumn = 0; iColumn < nColumns; ++iColumn) {
231 if (clusterFooter->fPagesPerColumn[iColumn].fRangeStarts.empty())
232 continue;
233 NTupleSize_t selfClusterOffset = clusterFooter->fPagesPerColumn[iColumn].fRangeStarts[0];
234 NTupleSize_t pointeeClusterOffset = kInvalidNTupleIndex;
235 auto itrPointee = fMapper.fColumn2Pointee.find(iColumn);
236 if (itrPointee != fMapper.fColumn2Pointee.end()) {
237 //printf("COLUMN %s wants to know pointee offset of column %s\n",
238 // fMapper.fId2ColumnModel[iColumn]->GetName().c_str(),
239 // fMapper.fId2ColumnModel[itrPointee->second]->GetName().c_str());
240 /// The pointee might not have any pages in this cluster (e.g. all empty collections)
241 if (!clusterFooter->fPagesPerColumn[itrPointee->second].fRangeStarts.empty())
242 pointeeClusterOffset = clusterFooter->fPagesPerColumn[itrPointee->second].fRangeStarts[0];
243 }
244 NTupleSize_t pageInCluster = 0;
245 for (auto rangeStart : clusterFooter->fPagesPerColumn[iColumn].fRangeStarts) {
246 fMapper.fColumnIndex[iColumn].fRangeStarts.push_back(rangeStart);
247 fMapper.fColumnIndex[iColumn].fClusterId.push_back(iCluster);
248 fMapper.fColumnIndex[iColumn].fPageInCluster.push_back(pageInCluster);
249 fMapper.fColumnIndex[iColumn].fSelfClusterOffset.push_back(selfClusterOffset);
250 fMapper.fColumnIndex[iColumn].fPointeeClusterOffset.push_back(pointeeClusterOffset);
251 pageInCluster++;
252 }
253 }
254 delete clusterFooter;
255 }
256
257 for (unsigned iColumn = 0; iColumn < nColumns; ++iColumn) {
258 fMapper.fColumnIndex[iColumn].fNElements = ntupleFooter->fNElementsPerColumn[iColumn];
259 }
260 fMapper.fNEntries = ntupleFooter->fNEntries;
261
262 delete ntupleFooter;
263 delete ntupleHeader;
264
265 // TODO(jblomer): replace RMapper by a ntuple descriptor
266 RNTupleDescriptorBuilder descBuilder;
267 descBuilder.SetNTuple(fNTupleName, RNTupleVersion());
268 fDescriptor = descBuilder.GetDescriptor();
269}
270
271
272std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::Detail::RPageSourceRoot::GenerateModel()
273{
274 auto model = std::make_unique<RNTupleModel>();
275 for (auto& f : fMapper.fRootFields) {
276 auto field = Detail::RFieldBase::Create(f.fFieldName, f.fTypeName);
277 model->AddField(std::unique_ptr<Detail::RFieldBase>(field));
278 }
279 return model;
280}
281
283 ColumnHandle_t columnHandle, NTupleSize_t index, RPage* page)
284{
285 auto columnId = columnHandle.fId;
286 auto nElems = fMapper.fColumnIndex[columnId].fNElements;
287 R__ASSERT(index < nElems);
288
289 NTupleSize_t firstInPage = 0;
290 NTupleSize_t firstOutsidePage = nElems;
291 NTupleSize_t pageIdx = 0;
292
293 std::size_t iLower = 0;
294 std::size_t iUpper = fMapper.fColumnIndex[columnId].fRangeStarts.size() - 1;
295 R__ASSERT(iLower <= iUpper);
296 unsigned iLast = iUpper;
297 while (iLower <= iUpper) {
298 std::size_t iPivot = (iLower + iUpper) / 2;
299 NTupleSize_t pivot = fMapper.fColumnIndex[columnId].fRangeStarts[iPivot];
300 if (pivot > index) {
301 iUpper = iPivot - 1;
302 } else {
303 auto next = nElems;
304 if (iPivot < iLast) next = fMapper.fColumnIndex[columnId].fRangeStarts[iPivot + 1];
305 if ((pivot == index) || (next > index)) {
306 firstOutsidePage = next;
307 firstInPage = pivot;
308 pageIdx = iPivot;
309 break;
310 } else {
311 iLower = iPivot + 1;
312 }
313 }
314 }
315
316 auto elemsInPage = firstOutsidePage - firstInPage;
317 void* buf = page->TryGrow(elemsInPage);
318 R__ASSERT(buf != nullptr);
319
320 auto clusterId = fMapper.fColumnIndex[columnId].fClusterId[pageIdx];
321 auto pageInCluster = fMapper.fColumnIndex[columnId].fPageInCluster[pageIdx];
322 auto selfOffset = fMapper.fColumnIndex[columnId].fSelfClusterOffset[pageIdx];
323 auto pointeeOffset = fMapper.fColumnIndex[columnId].fPointeeClusterOffset[pageIdx];
324 page->SetWindow(firstInPage, RPage::RClusterInfo(clusterId, selfOffset, pointeeOffset));
325
326 //printf("Populating page %lu/%lu [%lu] for column %d starting at %lu\n", clusterId, pageInCluster, pageIdx, columnId, firstInPage);
327
328 std::string keyName = std::string(RMapper::kKeyPagePayload) +
329 std::to_string(clusterId) + RMapper::kKeySeparator +
330 std::to_string(columnId) + RMapper::kKeySeparator +
331 std::to_string(pageInCluster);
332 auto pageKey = fDirectory->GetKey(keyName.c_str());
333 auto pagePayload = pageKey->ReadObject<ROOT::Experimental::Internal::RPagePayload>();
334 R__ASSERT(static_cast<std::size_t>(pagePayload->fSize) == page->GetSize());
335 memcpy(page->GetBuffer(), pagePayload->fContent, pagePayload->fSize);
336
337 free(pagePayload->fContent);
338 free(pagePayload);
339}
340
342{
343 return fMapper.fNEntries;
344}
345
347{
348 return fMapper.fColumnIndex[columnHandle.fId].fNElements;
349}
350
352{
353 // TODO(jblomer) distinguish trees
354 return columnHandle.fId;
355}
#define R__WARNING_HERE(GROUP)
Definition: RLogger.hxx:184
#define f(i)
Definition: RSha256.hxx:104
#define R__ASSERT(e)
Definition: TError.h:96
#define free
Definition: civetweb.c:1539
const RColumnModel & GetModel() const
Definition: RColumn.hxx:166
RColumn * GetOffsetColumn() const
Definition: RColumn.hxx:171
static RFieldBase * Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
Definition: RField.cxx:48
static constexpr const char * kKeyClusterFooter
static constexpr const char * kKeyPagePayload
static constexpr const char * kKeySeparator
static constexpr const char * kKeyNTupleFooter
static constexpr const char * kKeyNTupleHeader
void CommitDataset() final
Finalize the current cluster and the entrire data set.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
RPageSinkRoot(std::string_view ntupleName, RSettings settings)
void CommitCluster(NTupleSize_t nEntries) final
Finalize the current cluster and create a new one for the following data.
void Create(RNTupleModel *model) final
Physically creates the storage container to hold the tree (e.g., a directory in a TFile or a S3 bucke...
ColumnHandle_t AddColumn(RColumn *column) final
Register a new column.
Abstract interface to write data into a tree.
RPageSourceRoot(std::string_view ntupleName, RSettings settings)
ColumnHandle_t AddColumn(RColumn *column) final
Register a new column.
std::unique_ptr< ROOT::Experimental::RNTupleModel > GenerateModel() final
void Attach() final
TODO: copy/assignment for creating clones in multiple threads.
void PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t index, RPage *page) final
Fills a page starting with index rangeStart; the corresponding column is taken from the page object.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle) final
NTupleSize_t GetNElements(ColumnHandle_t columnHandle) final
Abstract interface to read data from a tree.
Stores information about the cluster in which this page resides.
Definition: RPage.hxx:45
A page is a fixed size slice of a column that is mapped into memory.
Definition: RPage.hxx:40
std::size_t GetSize() const
The space taken by column elements in the buffer.
Definition: RPage.hxx:87
NTupleSize_t GetNElements() const
Definition: RPage.hxx:88
void SetWindow(const NTupleSize_t rangeFirst, const RClusterInfo &clusterInfo)
Seek the page to a certain position of the column.
Definition: RPage.hxx:109
NTupleSize_t GetRangeFirst() const
Definition: RPage.hxx:89
void * TryGrow(std::size_t nElements)
Return a pointer after the last element that has space for nElements new elements.
Definition: RPage.hxx:98
Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various hea...
const RNTupleDescriptor & GetDescriptor() const
void SetNTuple(std::string_view name, const RNTupleVersion &version)
The RNTupleModel encapulates the schema of an ntuple.
For forward and backward compatibility, attach version information to the consitituents of the file f...
Definition: RNTupleUtil.hxx:73
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3980
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:44
constexpr NTupleSize_t kInvalidNTupleIndex
Definition: RNTupleUtil.hxx:45
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:61
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
Definition: file.py:1