Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleFillContext.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleFillContext.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5
6/*************************************************************************
7 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleFillContext
15#define ROOT_RNTupleFillContext
16
17#include <ROOT/RConfig.hxx> // for R__unlikely
18#include <ROOT/REntry.hxx>
19#include <ROOT/RError.hxx>
20#include <ROOT/RPageStorage.hxx>
24#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleTypes.hxx>
26
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <vector>
31
32namespace ROOT {
33
34namespace Experimental {
35class RNTupleAttrSetWriter;
36}
37
38// clang-format off
39/**
40\class ROOT::RNTupleFillContext
41\ingroup NTuple
42\brief A context for filling entries (data) into clusters of an RNTuple
43
44An output cluster can be filled with entries. The caller has to make sure that the data that gets filled into a cluster
45is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
46writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
47triggered by FlushCluster() or by destructing the context. On I/O errors, an exception is thrown.
48
49Instances of this class are not meant to be used in isolation and can be created from an RNTupleParallelWriter. For
50sequential writing, please refer to RNTupleWriter.
51*/
52// clang-format on
54 friend class ROOT::RNTupleWriter;
57
58private:
59 /// The page sink's parallel page compression scheduler if IMT is on.
60 /// Needs to be destructed after the page sink is destructed and so declared before.
61 std::unique_ptr<ROOT::Internal::RPageStorage::RTaskScheduler> fZipTasks;
62 std::unique_ptr<ROOT::Internal::RPageSink> fSink;
63 /// Needs to be destructed before fSink
64 std::unique_ptr<ROOT::RNTupleModel> fModel;
65
67
70 /// Keeps track of the number of bytes written into the current cluster
71 std::size_t fUnzippedClusterSize = 0;
72 /// The total number of bytes written to storage (i.e., after compression)
73 std::uint64_t fNBytesFlushed = 0;
74 /// The total number of bytes filled into all the so far committed clusters,
75 /// i.e. the uncompressed size of the written clusters
76 std::uint64_t fNBytesFilled = 0;
77 /// Limit for committing cluster no matter the other tunables
79 /// Estimator of uncompressed cluster size, taking into account the estimated compression ratio
81
82 /// Whether to enable staged cluster committing, where only an explicit call to CommitStagedClusters() will logically
83 /// append the clusters to the RNTuple.
85 /// Vector of currently staged clusters.
86 std::vector<ROOT::Internal::RPageSink::RStagedCluster> fStagedClusters;
87
88 template <typename Entry>
90 {
91 if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
92 throw RException(R__FAIL("mismatch between entry and model"));
93
94 const std::size_t bytesWritten = entry.Append();
96 fNEntries++;
97
101 status.fShouldFlushCluster =
103 }
104 template <typename Entry>
105 std::size_t FillImpl(Entry &entry)
106 {
108 FillNoFlushImpl(entry, status);
109 if (status.ShouldFlushCluster())
110 FlushCluster();
111 return status.GetLastEntrySize();
112 }
113
114 RNTupleFillContext(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<ROOT::Internal::RPageSink> sink);
119
120public:
122
123 /// Fill an entry into this context, but don't commit the cluster. The calling code must pass an RNTupleFillStatus
124 /// and check RNTupleFillStatus::ShouldFlushCluster.
125 ///
126 /// This method will check the entry's model ID to ensure it comes from the context's own model or throw an exception
127 /// otherwise.
129 /// Fill an entry into this context. This method will check the entry's model ID to ensure it comes from the
130 /// context's own model or throw an exception otherwise.
131 /// \return The number of uncompressed bytes written.
132 std::size_t Fill(ROOT::REntry &entry) { return FillImpl(entry); }
133
134 /// Fill an RRawPtrWriteEntry into this context, but don't commit the cluster. The calling code must pass an
135 /// RNTupleFillStatus and check RNTupleFillStatus::ShouldFlushCluster.
136 ///
137 /// This method will check the entry's model ID to ensure it comes from the context's own model or throw an exception
138 /// otherwise.
143 /// Fill an RRawPtrWriteEntry into this context. This method will check the entry's model ID to ensure it comes
144 /// from the context's own model or throw an exception otherwise.
145 /// \return The number of uncompressed bytes written.
147
148 /// Flush column data, preparing for CommitCluster or to reduce memory usage. This will trigger compression of pages,
149 /// but not actually write to storage.
150 void FlushColumns();
151 /// Flush so far filled entries to storage
152 void FlushCluster();
153 /// Logically append staged clusters to the RNTuple.
155
156 const ROOT::RNTupleModel &GetModel() const { return *fModel; }
157 std::unique_ptr<ROOT::REntry> CreateEntry() const { return fModel->CreateEntry(); }
158 std::unique_ptr<ROOT::Detail::RRawPtrWriteEntry> CreateRawPtrWriteEntry() const
159 {
160 return fModel->CreateRawPtrWriteEntry();
161 }
162
163 /// Return the entry number that was last flushed in a cluster.
165 /// Return the number of entries filled so far.
167
168 void EnableStagedClusterCommitting(bool val = true)
169 {
170 if (!val && !fStagedClusters.empty()) {
171 throw RException(R__FAIL("cannot disable staged committing with pending clusters"));
172 }
174 }
176
179};
180
181} // namespace ROOT
182
183#endif // ROOT_RNTupleFillContext
#define R__unlikely(expr)
Definition RConfig.hxx:592
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
A container of const raw pointers, corresponding to a row in the data set.
A collection of Counter objects with a name, a unit, and a description.
Class used to write an RNTupleAttrSet in the context of an RNTupleWriter.
The REntry is a collection of values in an RNTuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
A context for filling entries (data) into clusters of an RNTuple.
void FlushCluster()
Flush so far filled entries to storage.
ROOT::NTupleSize_t fLastFlushed
void FillNoFlushImpl(Entry &entry, ROOT::RNTupleFillStatus &status)
std::size_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
std::size_t Fill(ROOT::Detail::RRawPtrWriteEntry &entry)
Fill an RRawPtrWriteEntry into this context.
std::uint64_t fNBytesFilled
The total number of bytes filled into all the so far committed clusters, i.e.
void EnableStagedClusterCommitting(bool val=true)
ROOT::NTupleSize_t GetLastFlushed() const
Return the entry number that was last flushed in a cluster.
Experimental::Detail::RNTupleMetrics fMetrics
bool IsStagedClusterCommittingEnabled() const
ROOT::NTupleSize_t GetNEntries() const
Return the number of entries filled so far.
std::uint64_t fNBytesFlushed
The total number of bytes written to storage (i.e., after compression)
std::size_t FillImpl(Entry &entry)
std::vector< ROOT::Internal::RPageSink::RStagedCluster > fStagedClusters
Vector of currently staged clusters.
void FlushColumns()
Flush column data, preparing for CommitCluster or to reduce memory usage.
std::unique_ptr< ROOT::REntry > CreateEntry() const
RNTupleFillContext(std::unique_ptr< ROOT::RNTupleModel > model, std::unique_ptr< ROOT::Internal::RPageSink > sink)
std::size_t fUnzippedClusterSize
Keeps track of the number of bytes written into the current cluster.
std::unique_ptr< ROOT::Detail::RRawPtrWriteEntry > CreateRawPtrWriteEntry() const
const ROOT::RNTupleModel & GetModel() const
RNTupleFillContext & operator=(const RNTupleFillContext &)=delete
const Experimental::Detail::RNTupleMetrics & GetMetrics() const
void FillNoFlush(ROOT::Detail::RRawPtrWriteEntry &entry, ROOT::RNTupleFillStatus &status)
Fill an RRawPtrWriteEntry into this context, but don't commit the cluster.
RNTupleFillContext(RNTupleFillContext &&)=delete
void CommitStagedClusters()
Logically append staged clusters to the RNTuple.
RNTupleFillContext(const RNTupleFillContext &)=delete
RNTupleFillContext & operator=(RNTupleFillContext &&)=delete
bool fStagedClusterCommitting
Whether to enable staged cluster committing, where only an explicit call to CommitStagedClusters() wi...
std::unique_ptr< ROOT::Internal::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
std::unique_ptr< ROOT::RNTupleModel > fModel
Needs to be destructed before fSink.
std::unique_ptr< ROOT::Internal::RPageSink > fSink
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
void FillNoFlush(ROOT::REntry &entry, ROOT::RNTupleFillStatus &status)
Fill an entry into this context, but don't commit the cluster.
std::size_t Fill(ROOT::REntry &entry)
Fill an entry into this context.
A status object after filling an entry.
ROOT::NTupleSize_t fNEntriesSinceLastFlush
Number of entries written into the current cluster.
std::size_t fUnzippedClusterSize
Number of bytes written into the current cluster.
std::size_t fLastEntrySize
Number of bytes written for the last entry.
bool ShouldFlushCluster() const
Return true if the caller should call FlushCluster.
std::size_t GetLastEntrySize() const
Return the number of bytes for the last entry.
The RNTupleModel encapulates the schema of an RNTuple.
A writer to fill an RNTuple from multiple contexts.
An RNTuple that gets filled with entries (data) and writes them to storage.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.