Logo ROOT  
Reference Guide
RPageStorage.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RPageStorage.hxx>
17#include <ROOT/RColumn.hxx>
18#include <ROOT/RField.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RPagePool.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <Compression.h>
26#include <TError.h>
27
28#include <unordered_map>
29#include <utility>
30
31namespace {
32
33bool StrEndsWith(const std::string &str, const std::string &suffix)
34{
35 if (str.size() < suffix.size())
36 return false;
37 return (str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0);
38}
39
40} // anonymous namespace
41
43{
44}
45
47{
48}
49
50
51//------------------------------------------------------------------------------
52
53
55 : RPageStorage(name), fOptions(options)
56{
57}
58
60{
61}
62
63std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSource::Create(
64 std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options)
65{
66 if (StrEndsWith(std::string(location), ".root"))
67 return std::make_unique<RPageSourceRoot>(ntupleName, location, options);
68 return std::make_unique<RPageSourceRaw>(ntupleName, location, options);
69}
70
73{
75 auto columnId = fDescriptor.FindColumnId(fieldId, column.GetIndex());
77 return ColumnHandle_t(columnId, &column);
78}
79
81{
82 return fDescriptor.GetNEntries();
83}
84
86{
87 return fDescriptor.GetNElements(columnHandle.fId);
88}
89
91{
92 // TODO(jblomer) distinguish trees
93 return columnHandle.fId;
94}
95
96
97//------------------------------------------------------------------------------
98
99
101 : RPageStorage(name), fOptions(options)
102{
103}
104
106{
107}
108
109std::unique_ptr<ROOT::Experimental::Detail::RPageSink> ROOT::Experimental::Detail::RPageSink::Create(
110 std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options)
111{
112 if (StrEndsWith(std::string(location), ".root"))
113 return std::make_unique<RPageSinkRoot>(ntupleName, location, options);
114 return std::make_unique<RPageSinkRaw>(ntupleName, location, options);
115}
116
119{
120 auto columnId = fLastColumnId++;
121 fDescriptorBuilder.AddColumn(columnId, fieldId, column.GetVersion(), column.GetModel(), column.GetIndex());
122 return ColumnHandle_t(columnId, &column);
123}
124
125
127{
128 fDescriptorBuilder.SetNTuple(fNTupleName, model.GetDescription(), "undefined author",
129 model.GetVersion(), model.GetUuid());
130
131 std::unordered_map<const RFieldBase *, DescriptorId_t> fieldPtr2Id; // necessary to find parent field ids
132 const auto &rootField = *model.GetRootField();
133 fDescriptorBuilder.AddField(fLastFieldId, rootField.GetFieldVersion(), rootField.GetTypeVersion(),
134 rootField.GetName(), rootField.GetType(), rootField.GetNRepetitions(), rootField.GetStructure());
135 fieldPtr2Id[&rootField] = fLastFieldId++;
136 for (auto& f : *model.GetRootField()) {
137 fDescriptorBuilder.AddField(fLastFieldId, f.GetFieldVersion(), f.GetTypeVersion(), f.GetName(), f.GetType(),
138 f.GetNRepetitions(), f.GetStructure());
139 fDescriptorBuilder.AddFieldLink(fieldPtr2Id[f.GetParent()], fLastFieldId);
140
141 Detail::RFieldFuse::Connect(fLastFieldId, *this, f); // issues in turn one or several calls to AddColumn()
142 fieldPtr2Id[&f] = fLastFieldId++;
143 }
144
145 auto nColumns = fLastColumnId;
146 for (DescriptorId_t i = 0; i < nColumns; ++i) {
148 columnRange.fColumnId = i;
149 columnRange.fFirstElementIndex = 0;
150 columnRange.fNElements = 0;
151 columnRange.fCompressionSettings = fOptions.GetCompression();
152 fOpenColumnRanges.emplace_back(columnRange);
154 pageRange.fColumnId = i;
155 fOpenPageRanges.emplace_back(std::move(pageRange));
156 }
157
158 DoCreate(model);
159}
160
161
163{
164 auto locator = DoCommitPage(columnHandle, page);
165
166 auto columnId = columnHandle.fId;
167 fOpenColumnRanges[columnId].fNElements += page.GetNElements();
169 pageInfo.fNElements = page.GetNElements();
170 pageInfo.fLocator = locator;
171 fOpenPageRanges[columnId].fPageInfos.emplace_back(pageInfo);
172}
173
174
176{
177 auto locator = DoCommitCluster(nEntries);
178
179 R__ASSERT((nEntries - fPrevClusterNEntries) < ClusterSize_t(-1));
180 fDescriptorBuilder.AddCluster(fLastClusterId, RNTupleVersion(), fPrevClusterNEntries,
181 ClusterSize_t(nEntries - fPrevClusterNEntries));
182 fDescriptorBuilder.SetClusterLocator(fLastClusterId, locator);
183 for (auto &range : fOpenColumnRanges) {
184 fDescriptorBuilder.AddClusterColumnRange(fLastClusterId, range);
185 range.fFirstElementIndex += range.fNElements;
186 range.fNElements = 0;
187 }
188 for (auto &range : fOpenPageRanges) {
190 std::swap(fullRange, range);
191 range.fColumnId = fullRange.fColumnId;
192 fDescriptorBuilder.AddClusterPageRange(fLastClusterId, std::move(fullRange));
193 }
194 ++fLastClusterId;
195 fPrevClusterNEntries = nEntries;
196}
#define f(i)
Definition: RSha256.hxx:104
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
const RColumnModel & GetModel() const
Definition: RColumn.hxx:231
std::uint32_t GetIndex() const
Definition: RColumn.hxx:232
RNTupleVersion GetVersion() const
Definition: RColumn.hxx:237
static void Connect(DescriptorId_t fieldId, RPageStorage &pageStorage, RFieldBase &field)
Definition: RField.cxx:76
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
The RNTupleModel encapulates the schema of an ntuple.
std::string GetDescription() const
RFieldRoot * GetRootField() const
RNTupleVersion GetVersion() const
Common user-tunable settings for reading ntuples.
For forward and backward compatibility, attach version information to the consitituents of the file f...
Common user-tunable settings for storing ntuples.
basic_string_view< char > string_view
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:43
RClusterSize ClusterSize_t
Definition: RNTupleUtil.hxx:58
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:79
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:75
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:80
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.