Logo ROOT   master
Reference Guide
RPageStorageFile.cxx
Go to the documentation of this file.
1 /// \file RPageStorageFile.cxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-11-25
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #include <ROOT/RField.hxx>
17 #include <ROOT/RLogger.hxx>
19 #include <ROOT/RNTupleModel.hxx>
20 #include <ROOT/RNTupleZip.hxx>
21 #include <ROOT/RPage.hxx>
22 #include <ROOT/RPageAllocator.hxx>
23 #include <ROOT/RPagePool.hxx>
25 #include <ROOT/RRawFile.hxx>
26 
27 #include <RVersion.h>
28 #include <TError.h>
29 
30 #include <algorithm>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <iostream>
34 #include <utility>
35 
36 
37 ROOT::Experimental::Detail::RPageSinkFile::RPageSinkFile(std::string_view ntupleName, std::string_view path,
38  const RNTupleWriteOptions &options)
39  : RPageSink(ntupleName, options)
40  , fMetrics("RPageSinkRoot")
41  , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
42 {
43  R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
44  "Do not store real data with this version of RNTuple!";
45 
46  fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(Internal::RNTupleFileWriter::Recreate(
47  ntupleName, path, options.GetCompression(), options.GetContainerFormat()));
48 }
49 
50 
52  const RNTupleWriteOptions &options)
53  : RPageSink(ntupleName, options)
54  , fMetrics("RPageSinkRoot")
55  , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
56 {
57  R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
58  "Do not store real data with this version of RNTuple!";
59 
60  fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(Internal::RNTupleFileWriter::Append(ntupleName, file));
61 }
62 
63 
64 ROOT::Experimental::Detail::RPageSinkFile::RPageSinkFile(std::string_view ntupleName, std::string_view path,
65  const RNTupleWriteOptions &options, std::unique_ptr<TFile> &file)
66  : RPageSink(ntupleName, options)
67  , fMetrics("RPageSinkRoot")
68  , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
69 {
70  R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
71  "Do not store real data with this version of RNTuple!";
72  fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(
73  Internal::RNTupleFileWriter::Recreate(ntupleName, path, file));
74 }
75 
76 
78 {
79 }
80 
81 
83 {
84  const auto &descriptor = fDescriptorBuilder.GetDescriptor();
85  auto szHeader = descriptor.SerializeHeader(nullptr);
86  auto buffer = std::unique_ptr<unsigned char[]>(new unsigned char[szHeader]);
87  descriptor.SerializeHeader(buffer.get());
88 
89  auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[szHeader]);
90  auto szZipHeader = fCompressor(buffer.get(), szHeader, fOptions.GetCompression(),
91  [&zipBuffer](const void *b, size_t n, size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
92  fWriter->WriteNTupleHeader(zipBuffer.get(), szZipHeader, szHeader);
93 }
94 
95 
98 {
99  unsigned char *buffer = reinterpret_cast<unsigned char *>(page.GetBuffer());
100  bool isAdoptedBuffer = true;
101  auto packedBytes = page.GetSize();
102  auto element = columnHandle.fColumn->GetElement();
103  const auto isMappable = element->IsMappable();
104 
105  if (!isMappable) {
106  packedBytes = (page.GetNElements() * element->GetBitsOnStorage() + 7) / 8;
107  buffer = new unsigned char[packedBytes];
108  isAdoptedBuffer = false;
109  element->Pack(buffer, page.GetBuffer(), page.GetNElements());
110  }
111  auto zippedBytes = packedBytes;
112 
113  if (fOptions.GetCompression() != 0) {
114  zippedBytes = fCompressor(buffer, packedBytes, fOptions.GetCompression());
115  if (!isAdoptedBuffer)
116  delete[] buffer;
117  buffer = const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(fCompressor.GetZipBuffer()));
118  isAdoptedBuffer = true;
119  }
120 
121  auto offsetData = fWriter->WriteBlob(buffer, zippedBytes, packedBytes);
122  fClusterMinOffset = std::min(offsetData, fClusterMinOffset);
123  fClusterMaxOffset = std::max(offsetData, fClusterMaxOffset);
124 
125  if (!isAdoptedBuffer)
126  delete[] buffer;
127 
129  result.fPosition = offsetData;
130  result.fBytesOnStorage = zippedBytes;
131  return result;
132 }
133 
134 
137 {
139  result.fPosition = fClusterMinOffset;
140  result.fBytesOnStorage = fClusterMaxOffset - fClusterMinOffset;
141  fClusterMinOffset = std::uint64_t(-1);
142  fClusterMaxOffset = 0;
143  return result;
144 }
145 
146 
148 {
149  const auto &descriptor = fDescriptorBuilder.GetDescriptor();
150  auto szFooter = descriptor.SerializeFooter(nullptr);
151  auto buffer = std::unique_ptr<unsigned char []>(new unsigned char[szFooter]);
152  descriptor.SerializeFooter(buffer.get());
153 
154  auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[szFooter]);
155  auto szZipFooter = fCompressor(buffer.get(), szFooter, fOptions.GetCompression(),
156  [&zipBuffer](const void *b, size_t n, size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
157  fWriter->WriteNTupleFooter(zipBuffer.get(), szZipFooter, szFooter);
158  fWriter->Commit();
159 }
160 
161 
164 {
165  if (nElements == 0)
166  nElements = kDefaultElementsPerPage;
167  auto elementSize = columnHandle.fColumn->GetElement()->GetSize();
168  return fPageAllocator->NewPage(columnHandle.fId, elementSize, nElements);
169 }
170 
172 {
173  fPageAllocator->DeletePage(page);
174 }
175 
176 
177 ////////////////////////////////////////////////////////////////////////////////
178 
179 
181  ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
182 {
183  RPage newPage(columnId, mem, elementSize * nElements, elementSize);
184  newPage.TryGrow(nElements);
185  return newPage;
186 }
187 
189 {
190  if (page.IsNull())
191  return;
192  delete[] reinterpret_cast<unsigned char *>(page.GetBuffer());
193 }
194 
195 
196 ////////////////////////////////////////////////////////////////////////////////
197 
198 
200  const RNTupleReadOptions &options)
201  : RPageSource(ntupleName, options)
202  , fMetrics("RPageSourceFile")
203  , fPageAllocator(std::make_unique<RPageAllocatorFile>())
204  , fPagePool(std::make_shared<RPagePool>())
205 {
206 }
207 
208 
209 ROOT::Experimental::Detail::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, std::string_view path,
210  const RNTupleReadOptions &options)
211  : RPageSourceFile(ntupleName, options)
212 {
214  R__ASSERT(fFile);
216 }
217 
218 
220 {
221 }
222 
223 
225 {
226  RNTupleDescriptorBuilder descBuilder;
227  const auto fNTuple = fReader.GetNTuple(fNTupleName);
228 
229  auto buffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fLenHeader]);
230  auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fNBytesHeader]);
231  fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fSeekHeader);
232  fDecompressor(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fLenHeader, buffer.get());
233  descBuilder.SetFromHeader(buffer.get());
234 
235  buffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fLenFooter]);
236  zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fNBytesFooter]);
237  fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fSeekFooter);
238  fDecompressor(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fLenFooter, buffer.get());
239  descBuilder.AddClustersFromFooter(buffer.get());
240 
241  return descBuilder.MoveDescriptor();
242 }
243 
244 
246  ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
247 {
248  const auto columnId = columnHandle.fId;
249  const auto clusterId = clusterDescriptor.GetId();
250  const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
251 
252  // TODO(jblomer): binary search
254  decltype(clusterIndex) firstInPage = 0;
255  for (const auto &pi : pageRange.fPageInfos) {
256  if (firstInPage + pi.fNElements > clusterIndex) {
257  pageInfo = pi;
258  break;
259  }
260  firstInPage += pi.fNElements;
261  }
262  R__ASSERT(firstInPage <= clusterIndex);
263  R__ASSERT((firstInPage + pageInfo.fNElements) > clusterIndex);
264 
265  const auto element = columnHandle.fColumn->GetElement();
266  const auto elementSize = element->GetSize();
267 
268  auto pageSize = pageInfo.fLocator.fBytesOnStorage;
269  auto pageBuffer = new unsigned char[
270  std::max(pageSize, static_cast<std::uint32_t>(elementSize * pageInfo.fNElements))];
271  fReader.ReadBuffer(pageBuffer, pageInfo.fLocator.fBytesOnStorage, pageInfo.fLocator.fPosition);
272 
273  const auto bytesOnStorage = (element->GetBitsOnStorage() * pageInfo.fNElements + 7) / 8;
274  if (pageSize != bytesOnStorage) {
275  fDecompressor(pageBuffer, pageSize, bytesOnStorage);
276  pageSize = bytesOnStorage;
277  }
278 
279  if (!element->IsMappable()) {
280  pageSize = elementSize * pageInfo.fNElements;
281  auto unpackedBuffer = new unsigned char[pageSize];
282  element->Unpack(unpackedBuffer, pageBuffer, pageInfo.fNElements);
283  delete[] pageBuffer;
284  pageBuffer = unpackedBuffer;
285  }
286 
287  const auto indexOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
288  auto newPage = fPageAllocator->NewPage(columnId, pageBuffer, elementSize, pageInfo.fNElements);
289  newPage.SetWindow(indexOffset + firstInPage, RPage::RClusterInfo(clusterId, indexOffset));
290  fPagePool->RegisterPage(newPage,
291  RPageDeleter([](const RPage &page, void * /*userData*/)
292  {
294  }, nullptr));
295  return newPage;
296 }
297 
298 
300  ColumnHandle_t columnHandle, NTupleSize_t globalIndex)
301 {
302  const auto columnId = columnHandle.fId;
303  auto cachedPage = fPagePool->GetPage(columnId, globalIndex);
304  if (!cachedPage.IsNull())
305  return cachedPage;
306 
307  const auto clusterId = fDescriptor.FindClusterId(columnId, globalIndex);
308  R__ASSERT(clusterId != kInvalidDescriptorId);
309  const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
310  const auto selfOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
311  R__ASSERT(selfOffset <= globalIndex);
312  return PopulatePageFromCluster(columnHandle, clusterDescriptor, globalIndex - selfOffset);
313 }
314 
315 
317  ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)
318 {
319  const auto clusterId = clusterIndex.GetClusterId();
320  const auto index = clusterIndex.GetIndex();
321  const auto columnId = columnHandle.fId;
322  auto cachedPage = fPagePool->GetPage(columnId, clusterIndex);
323  if (!cachedPage.IsNull())
324  return cachedPage;
325 
326  R__ASSERT(clusterId != kInvalidDescriptorId);
327  const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
328  return PopulatePageFromCluster(columnHandle, clusterDescriptor, index);
329 }
330 
332 {
333  fPagePool->ReturnPage(page);
334 }
335 
336 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSourceFile::Clone() const
337 {
338  auto clone = new RPageSourceFile(fNTupleName, fOptions);
339  clone->fFile = fFile->Clone();
340  clone->fReader = Internal::RMiniFileReader(clone->fFile.get());
341  return std::unique_ptr<RPageSourceFile>(clone);
342 }
virtual bool IsMappable() const
Derived, typed classes tell whether the on-storage layout is bitwise identical to the memory layout...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
Manages pages read from a the file
RColumnElementBase * GetElement() const
Definition: RColumn.hxx:230
ENTupleContainerFormat GetContainerFormat() const
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
void CreateImpl(const RNTupleModel &model) final
The RNTupleModel encapulates the schema of an ntuple.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:48
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages...
Definition: RNTupleUtil.hxx:74
A closure that can free the memory associated with a mapped page
static RNTupleFileWriter * Recreate(std::string_view ntupleName, std::string_view path, int defaultCompression, ENTupleContainerFormat containerFormat)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName...
Definition: RMiniFile.cxx:1094
Abstract interface to write data into an ntuple
#define R__ASSERT(e)
Definition: TError.h:96
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
STL namespace.
A helper class for piece-wise construction of an RNTupleDescriptor
Abstract interface to read data from an ntuple
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
ClusterSize_t::ValueType GetSize() const
The space taken by column elements in the buffer.
Definition: RPage.hxx:81
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
NTupleSize_t fFirstElementIndex
A 64bit element index.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
#define R__WARNING_HERE(GROUP)
Definition: RLogger.hxx:184
Generic information about the physical location of data.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Common user-tunable settings for reading ntuples
Meta-data for a set of ntuple clusters
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:79
static RNTupleFileWriter * Append(std::string_view ntupleName, TFile &file)
Add a new RNTuple identified by ntupleName to the existing TFile.
Definition: RMiniFile.cxx:1137
Uses standard C++ memory allocation for the column data pages
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
Storage provider that reads ntuple pages from a file
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options)
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url...
Definition: RRawFile.cxx:73
A thread-safe cache of column pages.
Definition: RPagePool.hxx:46
DescriptorId_t GetClusterId() const
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
Definition: file.py:1
static constexpr double pi
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
The on-storage meta-data of an ntuple
void * TryGrow(ClusterSize_t::ValueType nElements)
Return a pointer after the last element that has space for nElements new elements.
Definition: RPage.hxx:107
We do not need to store the element size / uncompressed page size because we know to which column the...
Common user-tunable settings for storing ntuples
ClusterSize_t::ValueType GetIndex() const
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file...
A page is a slice of a column that is mapped into memory
Definition: RPage.hxx:41
Stores information about the cluster in which this page resides.
Definition: RPage.hxx:46
Read RNTuple data blocks from a TFile container, provided by a RRawFile
Definition: RMiniFile.hxx:101
const RPageRange & GetPageRange(DescriptorId_t columnId) const
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
const Int_t n
Definition: legend1.C:16
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:82