Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageSinkBuf.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageSinkBuf.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Max Orok <maxwellorok@gmail.com>
5/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
6/// \date 2021-03-17
7/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
8/// is welcome!
9
10/*************************************************************************
11 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
12 * All rights reserved. *
13 * *
14 * For the licensing terms see $ROOTSYS/LICENSE. *
15 * For the list of contributors see $ROOTSYS/README/CREDITS. *
16 *************************************************************************/
17
18#ifndef ROOT7_RPageSinkBuf
19#define ROOT7_RPageSinkBuf
20
22#include <ROOT/RPageStorage.hxx>
23
24#include <deque>
25#include <functional>
26#include <iterator>
27#include <memory>
28#include <tuple>
29
30namespace ROOT {
31namespace Experimental {
32namespace Internal {
33
34// clang-format off
35/**
36\class ROOT::Experimental::Internal::RPageSinkBuf
37\ingroup NTuple
38\brief Wrapper sink that coalesces cluster column page writes
39*/
40// clang-format on
41class RPageSinkBuf : public RPageSink {
42private:
43 /// A buffered column. The column is not responsible for RPage memory management (i.e. ReservePage),
44 /// which is handled by the enclosing RPageSinkBuf.
45 class RColumnBuf {
46 public:
47 struct RPageZipItem {
49 // Compression scratch buffer for fSealedPage.
50 std::unique_ptr<unsigned char[]> fBuf;
52 bool IsSealed() const { return fSealedPage != nullptr; }
53 };
54 public:
55 RColumnBuf() = default;
56 RColumnBuf(const RColumnBuf&) = delete;
57 RColumnBuf& operator=(const RColumnBuf&) = delete;
58 RColumnBuf(RColumnBuf&&) = default;
61
62 /// Returns a reference to the newly buffered page. The reference remains
63 /// valid until DropBufferedPages().
65 {
66 if (!fCol) {
67 fCol = columnHandle;
68 }
69 // Safety: Insertion at the end of a deque never invalidates references
70 // to existing elements.
71 return fBufferedPages.emplace_back();
72 }
73 const RPageStorage::ColumnHandle_t &GetHandle() const { return fCol; }
74 bool IsEmpty() const { return fBufferedPages.empty(); }
75 bool HasSealedPagesOnly() const { return fBufferedPages.size() == fSealedPages.size(); }
77
78 void DropBufferedPages();
79
80 // The returned reference points to a default-constructed RSealedPage. It can be used
81 // to fill in data after sealing.
83 {
84 return fSealedPages.emplace_back();
85 }
86
87 private:
89 /// Using a deque guarantees that element iterators are never invalidated
90 /// by appends to the end of the iterator by BufferPage.
91 std::deque<RPageZipItem> fBufferedPages;
92 /// Pages that have been already sealed by a concurrent task. A vector commit can be issued if all
93 /// buffered pages have been sealed.
94 /// Note that each RSealedPage refers to the same buffer as `fBufferedPages[i].fBuf` for some value of `i`, and
95 /// thus owned by RPageZipItem
97 };
98
99private:
100 /// I/O performance counters that get registered in fMetrics
101 struct RCounters {
105 };
106 std::unique_ptr<RCounters> fCounters;
107 /// The inner sink, responsible for actually performing I/O.
108 std::unique_ptr<RPageSink> fInnerSink;
109 /// The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
110 /// For the unbuffered case, the RNTupleModel is instead managed by a RNTupleWriter.
111 std::unique_ptr<RNTupleModel> fInnerModel;
112 /// Vector of buffered column pages. Indexed by column id.
113 std::vector<RColumnBuf> fBufferedColumns;
114 /// Columns committed as suppressed are stored and passed to the inner sink at cluster commit
115 std::vector<ColumnHandle_t> fSuppressedColumns;
118
119 void ConnectFields(const std::vector<RFieldBase *> &fields, NTupleSize_t firstEntry);
120 void FlushClusterImpl(std::function<void(void)> FlushClusterFn);
121
122public:
123 explicit RPageSinkBuf(std::unique_ptr<RPageSink> inner);
124 RPageSinkBuf(const RPageSinkBuf&) = delete;
128 ~RPageSinkBuf() override;
129
130 ColumnHandle_t AddColumn(DescriptorId_t fieldId, RColumn &column) final;
131
132 const RNTupleDescriptor &GetDescriptor() const final;
133
134 void InitImpl(RNTupleModel &model) final;
135 void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
136 void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final;
137
138 void CommitSuppressedColumn(ColumnHandle_t columnHandle) final;
139 void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final;
140 void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final;
141 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges) final;
142 std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final;
143 RStagedCluster StageCluster(NTupleSize_t nNewEntries) final;
144 void CommitStagedClusters(std::span<RStagedCluster> clusters) final;
145 void CommitClusterGroup() final;
146 void CommitDatasetImpl() final;
147
148 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
149}; // RPageSinkBuf
150
151} // namespace Internal
152} // namespace Experimental
153} // namespace ROOT
154
155#endif
A non thread-safe integral performance counter.
An either thread-safe or non thread safe counter for CPU ticks.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:40
RPageZipItem & BufferPage(RPageStorage::ColumnHandle_t columnHandle)
Returns a reference to the newly buffered page.
RColumnBuf & operator=(const RColumnBuf &)=delete
const RPageStorage::ColumnHandle_t & GetHandle() const
RPageStorage::SealedPageSequence_t fSealedPages
Pages that have been already sealed by a concurrent task.
std::deque< RPageZipItem > fBufferedPages
Using a deque guarantees that element iterators are never invalidated by appends to the end of the it...
RColumnBuf & operator=(RColumnBuf &&)=default
const RPageStorage::SealedPageSequence_t & GetSealedPages() const
Wrapper sink that coalesces cluster column page writes.
RPageSinkBuf & operator=(RPageSinkBuf &&)=default
std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final
Finalize the current cluster and create a new one for the following data.
void CommitStagedClusters(std::span< RStagedCluster > clusters) final
Commit staged clusters, logically appending them to the ntuple descriptor.
RPageSinkBuf(const RPageSinkBuf &)=delete
std::vector< RColumnBuf > fBufferedColumns
Vector of buffered column pages. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, RColumn &column) final
Register a new column.
RStagedCluster StageCluster(NTupleSize_t nNewEntries) final
Stage the current cluster and create a new one for the following data.
std::unique_ptr< RNTupleModel > fInnerModel
The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
std::unique_ptr< RPageSink > fInnerSink
The inner sink, responsible for actually performing I/O.
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
void FlushClusterImpl(std::function< void(void)> FlushClusterFn)
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
const RNTupleDescriptor & GetDescriptor() const final
Return the RNTupleDescriptor being constructed.
std::unique_ptr< RCounters > fCounters
void InitImpl(RNTupleModel &model) final
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
void ConnectFields(const std::vector< RFieldBase * > &fields, NTupleSize_t firstEntry)
std::vector< ColumnHandle_t > fSuppressedColumns
Columns committed as suppressed are stored and passed to the inner sink at cluster commit.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final
Adds an extra type information record to schema.
void CommitSuppressedColumn(ColumnHandle_t columnHandle) final
Commits a suppressed column for the current cluster.
RPageSinkBuf & operator=(const RPageSinkBuf &)=delete
Abstract interface to write data into an ntuple.
Common functionality of an ntuple storage for both reading and writing.
std::deque< RSealedPage > SealedPageSequence_t
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:46
Field specific extra type information from the header / extenstion header.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The incremental changes to a RNTupleModel
I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTuplePlainCounter > & fTimeCpuCriticalSection
Cluster that was staged, but not yet logically appended to the RNTuple.
A range of sealed pages referring to the same column that can be used for vector commit.
A sealed page contains the bytes of a page as written to storage (packed & compressed).