Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleWriteOptions
17#define ROOT7_RNTupleWriteOptions
18
19#include <Compression.h>
20
21#include <cstdint>
22#include <cstddef>
23#include <memory>
24
25namespace ROOT {
26namespace Experimental {
27
28class RNTupleWriteOptions;
29
30namespace Internal {
31
33public:
34 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
35};
36
37} // namespace Internal
38
39// clang-format off
40/**
41\class ROOT::Experimental::RNTupleWriteOptions
42\ingroup NTuple
43\brief Common user-tunable settings for storing ntuples
44
45All page sink classes need to support the common options.
46*/
47// clang-format on
49public:
50 enum class EImplicitMT {
51 kOff,
53 };
54
55 // clang-format off
56 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
57
59 // clang-format on
60
61protected:
63 /// Approximation of the target compressed cluster size
64 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
65 /// Memory limit for committing a cluster: with very high compression ratio, we need a limit
66 /// on how large the I/O buffer can grow during writing.
68 /// Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
69 /// of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
70 /// given by fMaxUnzippedPageSize or until the total page buffer limit is reached (as a sum of all page buffers).
71 /// The total write buffer limit needs to be large enough to hold the initial pages of all columns.
72 std::size_t fInitialUnzippedPageSize = 256;
73 /// Pages can grow only to the given limit in bytes.
74 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
75 /// The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
76 /// If set to zero, RNTuple will auto-adjust the budget based on the value of fApproxZippedClusterSize.
77 /// If set manually, the size needs to be large enough to hold all initial page buffers.
78 /// The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
79 /// Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
80 /// The default values are tuned for a total write memory of around 300 MB per fill context.
81 std::size_t fPageBufferBudget = 0;
82 /// Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
83 /// to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
84 bool fUseBufferedWrite = true;
85 /// Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
86 /// filesystems and platforms.
87 bool fUseDirectIO = false;
88 /// Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
89 /// performance (with Direct I/O) at a reasonable memory consumption.
90 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
91 /// Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
93 /// If set, checksums will be calculated and written for every page.
95 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
96 /// any payload whose size exceeds this will be split into multiple keys.
98
99public:
100
101 virtual ~RNTupleWriteOptions() = default;
102 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
103
104 int GetCompression() const { return fCompression; }
105 void SetCompression(int val) { fCompression = val; }
106 void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
107 {
108 fCompression = CompressionSettings(algorithm, compressionLevel);
109 }
110
112 void SetApproxZippedClusterSize(std::size_t val);
113
115 void SetMaxUnzippedClusterSize(std::size_t val);
116
118 void SetInitialUnzippedPageSize(std::size_t val);
119
120 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
121 void SetMaxUnzippedPageSize(std::size_t val);
122
123 std::size_t GetPageBufferBudget() const;
124 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
125
126 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
127 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
128
129 bool GetUseDirectIO() const { return fUseDirectIO; }
130 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
131
132 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
133 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
134
137
139 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
141
142 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
143};
144
145namespace Internal {
146inline void RNTupleWriteOptionsManip::SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
147{
148 options.fMaxKeySize = maxKeySize;
149}
150} // namespace Internal
151
152} // namespace Experimental
153} // namespace ROOT
154
155#endif // ROOT7_RNTupleWriteOptions
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing ntuples.
std::size_t fPageBufferBudget
The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed...
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
std::size_t fWriteBufferSize
Buffer size to use for writing to files, must be a multiple of 4096 bytes.
bool fEnablePageChecksums
If set, checksums will be calculated and written for every page.
bool fUseDirectIO
Whether to use Direct I/O for writing.
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
std::size_t fMaxUnzippedPageSize
Pages can grow only to the given limit in bytes.
std::size_t fInitialUnzippedPageSize
Initially, columns start with a page of this size.
std::size_t fApproxZippedClusterSize
Approximation of the target compressed cluster size.
std::size_t fMaxUnzippedClusterSize
Memory limit for committing a cluster: with very high compression ratio, we need a limit on how large...
bool fUseBufferedWrite
Whether to use buffered writing (with RPageSinkBuf).
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT fUseImplicitMT
Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is ...
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
static constexpr std::uint64_t kDefaultMaxKeySize
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58