Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageSinkBuf.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageSinkBuf.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Max Orok <maxwellorok@gmail.com>
5/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
6/// \date 2021-03-17
7/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
8/// is welcome!
9
10/*************************************************************************
11 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
12 * All rights reserved. *
13 * *
14 * For the licensing terms see $ROOTSYS/LICENSE. *
15 * For the list of contributors see $ROOTSYS/README/CREDITS. *
16 *************************************************************************/
17
18#ifndef ROOT7_RPageSinkBuf
19#define ROOT7_RPageSinkBuf
20
22#include <ROOT/RPageStorage.hxx>
23
24#include <deque>
25#include <iterator>
26#include <memory>
27#include <tuple>
28
29namespace ROOT {
30namespace Experimental {
31namespace Internal {
32
33// clang-format off
34/**
35\class ROOT::Experimental::Internal::RPageSinkBuf
36\ingroup NTuple
37\brief Wrapper sink that coalesces cluster column page writes
38*/
39// clang-format on
40class RPageSinkBuf : public RPageSink {
41private:
42 /// A buffered column. The column is not responsible for RPage memory management (i.e.
43 /// ReservePage/ReleasePage), which is handled by the enclosing RPageSinkBuf.
44 class RColumnBuf {
45 public:
46 struct RPageZipItem {
48 // Compression scratch buffer for fSealedPage.
49 std::unique_ptr<unsigned char[]> fBuf;
51 bool IsSealed() const { return fSealedPage != nullptr; }
52 void AllocateSealedPageBuf(std::size_t nBytes)
53 {
54 fBuf = std::unique_ptr<unsigned char[]>(new unsigned char[nBytes]);
55 }
56 };
57 public:
58 RColumnBuf() = default;
59 RColumnBuf(const RColumnBuf&) = delete;
60 RColumnBuf& operator=(const RColumnBuf&) = delete;
61 RColumnBuf(RColumnBuf&&) = default;
64
65 /// Returns a reference to the newly buffered page. The reference remains
66 /// valid until the return value of DrainBufferedPages() is destroyed.
68 {
69 if (!fCol) {
70 fCol = columnHandle;
71 }
72 // Safety: Insertion at the end of a deque never invalidates references
73 // to existing elements.
74 return fBufferedPages.emplace_back();
75 }
76 const RPageStorage::ColumnHandle_t &GetHandle() const { return fCol; }
77 bool IsEmpty() const { return fBufferedPages.empty(); }
78 bool HasSealedPagesOnly() const { return fBufferedPages.size() == fSealedPages.size(); }
80
81 using BufferedPages_t = std::tuple<std::deque<RPageZipItem>, RPageStorage::SealedPageSequence_t>;
82 /// When the return value of DrainBufferedPages() is destroyed, all references
83 /// returned by GetBuffer are invalidated.
84 /// This function gives up on the ownership of the buffered pages. Thus, `ReleasePage()` must be called
85 /// accordingly.
87 {
88 BufferedPages_t drained;
89 std::swap(fBufferedPages, std::get<decltype(fBufferedPages)>(drained));
90 std::swap(fSealedPages, std::get<decltype(fSealedPages)>(drained));
91 return drained;
92 }
93 void DropBufferedPages();
94
95 // The returned reference points to a default-constructed RSealedPage. It can be used
96 // to fill in data after sealing.
98 {
99 return fSealedPages.emplace_back();
100 }
101
102 private:
104 /// Using a deque guarantees that element iterators are never invalidated
105 /// by appends to the end of the iterator by BufferPage.
106 std::deque<RPageZipItem> fBufferedPages;
107 /// Pages that have been already sealed by a concurrent task. A vector commit can be issued if all
108 /// buffered pages have been sealed.
109 /// Note that each RSealedPage refers to the same buffer as `fBufferedPages[i].fBuf` for some value of `i`, and
110 /// thus owned by RPageZipItem
112 };
113
114private:
115 /// I/O performance counters that get registered in fMetrics
116 struct RCounters {
120 };
121 std::unique_ptr<RCounters> fCounters;
122 /// The inner sink, responsible for actually performing I/O.
123 std::unique_ptr<RPageSink> fInnerSink;
124 /// The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
125 /// For the unbuffered case, the RNTupleModel is instead managed by a RNTupleWriter.
126 std::unique_ptr<RNTupleModel> fInnerModel;
127 /// Vector of buffered column pages. Indexed by column id.
128 std::vector<RColumnBuf> fBufferedColumns;
131
132 void ConnectFields(const std::vector<RFieldBase *> &fields, NTupleSize_t firstEntry);
133
134public:
135 explicit RPageSinkBuf(std::unique_ptr<RPageSink> inner);
136 RPageSinkBuf(const RPageSinkBuf&) = delete;
140 ~RPageSinkBuf() override;
141
142 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
143
144 const RNTupleDescriptor &GetDescriptor() const final;
145
146 void InitImpl(RNTupleModel &model) final;
147 void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
148 void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final;
149
150 void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final;
151 void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final;
152 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges) final;
153 std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final;
154 void CommitClusterGroup() final;
155 void CommitDatasetImpl() final;
156
157 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
158 void ReleasePage(RPage &page) final;
159}; // RPageSinkBuf
160
161} // namespace Internal
162} // namespace Experimental
163} // namespace ROOT
164
165#endif
A non thread-safe integral performance counter.
An either thread-safe or non thread safe counter for CPU ticks.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:43
std::tuple< std::deque< RPageZipItem >, RPageStorage::SealedPageSequence_t > BufferedPages_t
RPageZipItem & BufferPage(RPageStorage::ColumnHandle_t columnHandle)
Returns a reference to the newly buffered page.
RColumnBuf & operator=(const RColumnBuf &)=delete
const RPageStorage::ColumnHandle_t & GetHandle() const
RPageStorage::SealedPageSequence_t fSealedPages
Pages that have been already sealed by a concurrent task.
BufferedPages_t DrainBufferedPages()
When the return value of DrainBufferedPages() is destroyed, all references returned by GetBuffer are ...
std::deque< RPageZipItem > fBufferedPages
Using a deque guarantees that element iterators are never invalidated by appends to the end of the it...
RColumnBuf & operator=(RColumnBuf &&)=default
const RPageStorage::SealedPageSequence_t & GetSealedPages() const
Wrapper sink that coalesces cluster column page writes.
RPageSinkBuf & operator=(RPageSinkBuf &&)=default
std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final
Finalize the current cluster and create a new one for the following data.
RPageSinkBuf(const RPageSinkBuf &)=delete
std::vector< RColumnBuf > fBufferedColumns
Vector of buffered column pages. Indexed by column id.
std::unique_ptr< RNTupleModel > fInnerModel
The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
std::unique_ptr< RPageSink > fInnerSink
The inner sink, responsible for actually performing I/O.
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
const RNTupleDescriptor & GetDescriptor() const final
Return the RNTupleDescriptor being constructed.
std::unique_ptr< RCounters > fCounters
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
void InitImpl(RNTupleModel &model) final
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
void ConnectFields(const std::vector< RFieldBase * > &fields, NTupleSize_t firstEntry)
void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final
Adds an extra type information record to schema.
RPageSinkBuf & operator=(const RPageSinkBuf &)=delete
Abstract interface to write data into an ntuple.
Common functionality of an ntuple storage for both reading and writing.
std::deque< RSealedPage > SealedPageSequence_t
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Field specific extra type information from the header / extenstion header.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The incremental changes to a RNTupleModel
I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTuplePlainCounter > & fTimeCpuCriticalSection
A range of sealed pages referring to the same column that can be used for vector commit.
A sealed page contains the bytes of a page as written to storage (packed & compressed).