Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleWriteOptions
17#define ROOT7_RNTupleWriteOptions
18
19#include <Compression.h>
20
21#include <cstdint>
22#include <cstddef>
23#include <memory>
24
25namespace ROOT {
26namespace Experimental {
27
28// clang-format off
29/**
30\class ROOT::Experimental::RNTupleWriteOptions
31\ingroup NTuple
32\brief Common user-tunable settings for storing ntuples
33
34All page sink classes need to support the common options.
35*/
36// clang-format on
38public:
39 enum class EImplicitMT {
40 kOff,
42 };
43
44protected:
46 /// Approximation of the target compressed cluster size
47 std::size_t fApproxZippedClusterSize = 50 * 1000 * 1000;
48 /// Memory limit for committing a cluster: with very high compression ratio, we need a limit
49 /// on how large the I/O buffer can grow during writing.
50 std::size_t fMaxUnzippedClusterSize = 512 * 1024 * 1024;
51 /// Should be just large enough so that the compression ratio does not benefit much more from larger pages.
52 /// Unless the cluster is too small to contain a sufficiently large page, pages are
53 /// fApproxUnzippedPageSize in size. If tail page optimization is enabled, the last page in a cluster is
54 /// between fApproxUnzippedPageSize/2 and fApproxUnzippedPageSize * 1.5 in size.
55 std::size_t fApproxUnzippedPageSize = 64 * 1024;
56 /// Whether to optimize tail pages to avoid an undersized last page per cluster (see above). Increases the
57 /// required memory by a factor 3x.
59 /// Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
60 /// to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
61 bool fUseBufferedWrite = true;
62 /// Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
64 /// If set, 64bit index columns are replaced by 32bit index columns. This limits the cluster size to 512MB
65 /// but it can result in smaller file sizes for data sets with many collections and lz4 or no compression.
66 bool fHasSmallClusters = false;
67
68public:
69 /// A maximum size of 512MB still allows for a vector of bool to be stored in a small cluster. This is the
70 /// worst case wrt. the maximum required size of the index column. A 32bit index column can address 512MB
71 /// of 1-bit (on disk size) bools.
72 static constexpr std::uint64_t kMaxSmallClusterSize = 512 * 1024 * 1024;
73
74 virtual ~RNTupleWriteOptions() = default;
75 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
76
77 int GetCompression() const { return fCompression; }
78 void SetCompression(int val) { fCompression = val; }
79 void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
80 {
81 fCompression = CompressionSettings(algorithm, compressionLevel);
82 }
83
85 void SetApproxZippedClusterSize(std::size_t val);
86
87 std::size_t GetMaxUnzippedClusterSize() const { return fMaxUnzippedClusterSize; }
88 void SetMaxUnzippedClusterSize(std::size_t val);
89
90 std::size_t GetApproxUnzippedPageSize() const { return fApproxUnzippedPageSize; }
91 void SetApproxUnzippedPageSize(std::size_t val);
92
95
96 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
97 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
98
101
102 bool GetHasSmallClusters() const { return fHasSmallClusters; }
103 void SetHasSmallClusters(bool val) { fHasSmallClusters = val; }
104};
105
106} // namespace Experimental
107} // namespace ROOT
108
109#endif // ROOT7_RNTupleWriteOptions
Common user-tunable settings for storing ntuples.
bool fUseTailPageOptimization
Whether to optimize tail pages to avoid an undersized last page per cluster (see above).
bool fHasSmallClusters
If set, 64bit index columns are replaced by 32bit index columns.
static constexpr std::uint64_t kMaxSmallClusterSize
A maximum size of 512MB still allows for a vector of bool to be stored in a small cluster.
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
std::size_t fApproxZippedClusterSize
Approximation of the target compressed cluster size.
std::size_t fMaxUnzippedClusterSize
Memory limit for committing a cluster: with very high compression ratio, we need a limit on how large...
std::size_t fApproxUnzippedPageSize
Should be just large enough so that the compression ratio does not benefit much more from larger page...
bool fUseBufferedWrite
Whether to use buffered writing (with RPageSinkBuf).
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT fUseImplicitMT
Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is ...
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name.
Definition Compression.h:85
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:56