Logo ROOT   master
Reference Guide
RPageStorageFile.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorageFile.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-11-21
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorageFile
17 #define ROOT7_RPageStorageFile
18 
19 #include <ROOT/RPageStorage.hxx>
20 #include <ROOT/RMiniFile.hxx>
21 #include <ROOT/RNTupleMetrics.hxx>
22 #include <ROOT/RNTupleZip.hxx>
23 #include <ROOT/RStringView.hxx>
24 
25 #include <array>
26 #include <cstdio>
27 #include <memory>
28 #include <string>
29 
30 class TFile;
31 
32 namespace ROOT {
33 
34 namespace Internal {
35 class RRawFile;
36 }
37 
38 namespace Experimental {
39 namespace Detail {
40 
41 class RPageAllocatorHeap;
42 class RPagePool;
43 
44 
45 // clang-format off
46 /**
47 \class ROOT::Experimental::Detail::RPageSinkFile
48 \ingroup NTuple
49 \brief Storage provider that write ntuple pages into a file
50 
51 The written file can be either in ROOT format or in RNTuple bare format.
52 */
53 // clang-format on
54 class RPageSinkFile : public RPageSink {
55 public:
56  static constexpr std::size_t kDefaultElementsPerPage = 10000;
57 
58 private:
60  std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
61 
62  std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
63  /// Byte offset of the first page of the current cluster
64  std::uint64_t fClusterMinOffset = std::uint64_t(-1);
65  /// Byte offset of the end of the last page of the current cluster
66  std::uint64_t fClusterMaxOffset = 0;
67  /// Helper for zipping keys and header / footer; comprises a 16MB zip buffer
69 
70 protected:
71  void CreateImpl(const RNTupleModel &model) final;
72  RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
74  void CommitDatasetImpl() final;
75 
76 public:
77  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
78  RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
79  std::unique_ptr<TFile> &file);
80  RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
81  virtual ~RPageSinkFile();
82 
83  RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) final;
84  void ReleasePage(RPage &page) final;
85 
86  RNTupleMetrics &GetMetrics() final { return fMetrics; }
87 };
88 
89 
90 // clang-format off
91 /**
92 \class ROOT::Experimental::Detail::RPageAllocatorFile
93 \ingroup NTuple
94 \brief Manages pages read from a the file
95 */
96 // clang-format on
98 public:
99  static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
100  static void DeletePage(const RPage& page);
101 };
102 
103 
104 // clang-format off
105 /**
106 \class ROOT::Experimental::Detail::RPageSourceFile
107 \ingroup NTuple
108 \brief Storage provider that reads ntuple pages from a file
109 */
110 // clang-format on
111 class RPageSourceFile : public RPageSource {
112 public:
113  /// Cannot process pages larger than 1MB
114  static constexpr std::size_t kMaxPageSize = 1024 * 1024;
115 
116 private:
118  /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
119  std::unique_ptr<RPageAllocatorFile> fPageAllocator;
120  /// The page pool migh, at some point, be used by multiple page sources
121  std::shared_ptr<RPagePool> fPagePool;
122  /// Helper to unzip pages and header/footer; comprises a 16MB unzip buffer
124  /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
125  std::unique_ptr<ROOT::Internal::RRawFile> fFile;
126  /// Takes the fFile to read ntuple blobs from it
128 
129  RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
130  RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
131  ClusterSize_t::ValueType clusterIndex);
132 
133 protected:
135 
136 public:
137  RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
138  /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
139  /// The meta-data (header and footer) is reread and parsed by the clone.
140  std::unique_ptr<RPageSource> Clone() const final;
141  virtual ~RPageSourceFile();
142 
143  RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
144  RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
145  void ReleasePage(RPage &page) final;
146 
147  RNTupleMetrics &GetMetrics() final { return fMetrics; }
148 };
149 
150 
151 } // namespace Detail
152 
153 } // namespace Experimental
154 } // namespace ROOT
155 
156 #endif
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
Manages pages read from a the file
Returns the available number of logical cores.
Definition: StringConv.hxx:21
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
void CreateImpl(const RNTupleModel &model) final
The RNTupleModel encapulates the schema of an ntuple.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:48
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages...
Definition: RNTupleUtil.hxx:74
RNTupleDecompressor fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB unzip buffer.
Abstract interface to write data into an ntuple
RNTupleCompressor fCompressor
Helper for zipping keys and header / footer; comprises a 16MB zip buffer.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
STL namespace.
RNTupleMetrics & GetMetrics() final
Page storage implementations usually have their own metrics.
Abstract interface to read data from an ntuple
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
Helper class to uncompress data blocks in the ROOT compression frame format
Definition: RNTupleZip.hxx:132
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::shared_ptr< RPagePool > fPagePool
The page pool migh, at some point, be used by multiple page sources.
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
A collection of Counter objects with a name, a unit, and a description.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
Generic information about the physical location of data.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Common user-tunable settings for reading ntuples
Storage provider that write ntuple pages into a file
Meta-data for a set of ntuple clusters
Helper class to compress data blocks in the ROOT compression frame format
Definition: RNTupleZip.hxx:40
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RNTupleMetrics & GetMetrics() final
Page storage implementations usually have their own metrics.
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
static constexpr std::size_t kDefaultElementsPerPage
Storage provider that reads ntuple pages from a file
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
Definition: file.py:1
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
The on-storage meta-data of an ntuple
Common user-tunable settings for storing ntuples
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file...
A page is a slice of a column that is mapped into memory
Definition: RPage.hxx:41
Read RNTuple data blocks from a TFile container, provided by a RRawFile
Definition: RMiniFile.hxx:101
static constexpr std::size_t kMaxPageSize
Cannot process pages larger than 1MB.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:82