Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleMerger.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleMerger.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>, Max Orok <maxwellorok@gmail.com>, Alaettin Serhan Mete <amete@anl.gov>
4/// \date 2020-07-08
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RNTupleMerger
17#define ROOT_RNTupleMerger
18
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/TTaskGroup.hxx>
24#include <Compression.h>
25
26#include <memory>
27#include <optional>
28
29namespace ROOT {
30
31class RNTuple;
32
33namespace Internal {
34class RPageAllocator;
35class RClusterPool;
36}
37
38namespace Experimental::Internal {
39
41 /// The merger will discard all columns that aren't present in the prototype model (i.e. the model of the first
42 /// source); also all subsequent RNTuples must contain at least all the columns that are present in the prototype
43 /// model
44 kFilter,
45 /// The merger will refuse to merge any 2 RNTuples whose schema doesn't match exactly
46 kStrict,
47 /// The merger will update the output model to include all columns from all sources. Entries corresponding to columns
48 /// that are not present in a source will be set to the default value of the type.
49 kUnion
50};
51
53 /// The merger will abort merging as soon as an error is encountered
54 kAbort,
55 /// Upon errors, the merger will skip the current source and continue
56 kSkip
57};
58
59struct RColumnMergeInfo;
60struct RNTupleMergeData;
61struct RSealedPageMergeData;
62
63/// Set of merging options to pass to RNTupleMerger.
64/// If you're using the merger through TFileMerger you need to give it string-based options instead.
65/// Here is the mapping for the TFileMerger options:
66/// - "rntuple.MergingMode=(Filter|Union|...)" -> sets fMergingMode
67/// - "rntuple.ErrBehavior=(Abort|Skip|...)" -> sets fErrBehavior
68/// - "rntuple.ExtraVerbose" -> sets fExtraVerbose to true
69/// Rules about the string-based options:
70/// 1. there must be no space between the separators (i.e. `.` and `=`)
71/// 2. all string matching is case insensitive
73 /// If fCompressionSettings is empty (the default), the merger will not change the
74 /// compression of any of its sources (fast merging). Otherwise, all sources will be converted to the specified
75 /// compression algorithm and level.
76 std::optional<std::uint32_t> fCompressionSettings;
77 /// Determines how the merging treats sources with different models (\see ENTupleMergingMode).
79 /// Determines how the Merge function behaves upon merging errors
81 /// If true, the merger will emit further diagnostics and information.
82 bool fExtraVerbose = false;
83};
84
85// clang-format off
86/**
87 * \class ROOT::Experimental::Internal::RNTupleMerger
88 * \ingroup NTuple
89 * \brief Given a set of RPageSources merge them into an RPagePersistentSink, optionally changing their compression.
90 * This can also be used to change the compression of a single RNTuple by just passing a single source.
91 */
92// clang-format on
94 friend class ROOT::RNTuple;
95
96 std::unique_ptr<ROOT::Internal::RPagePersistentSink> fDestination;
97 std::unique_ptr<ROOT::Internal::RPageAllocator> fPageAlloc;
98 std::optional<TTaskGroup> fTaskGroup;
99 std::unique_ptr<ROOT::RNTupleModel> fModel;
100
102 std::span<const RColumnMergeInfo> commonColumns,
106
107 void MergeSourceClusters(ROOT::Internal::RPageSource &source, std::span<const RColumnMergeInfo> commonColumns,
108 std::span<const RColumnMergeInfo> extraDstColumns, RNTupleMergeData &mergeData);
109
110 /// Creates a RNTupleMerger with the given destination.
111 /// The model must be given if and only if `destination` has been initialized with that model
112 /// (i.e. in case of incremental merging).
113 RNTupleMerger(std::unique_ptr<ROOT::Internal::RPagePersistentSink> destination,
114 std::unique_ptr<ROOT::RNTupleModel> model);
115
116public:
117 /// Creates a RNTupleMerger with the given destination.
118 explicit RNTupleMerger(std::unique_ptr<ROOT::Internal::RPagePersistentSink> destination);
119
120 /// Merge a given set of sources into the destination.
121 /// Note that sources with an empty schema (i.e. created from a Model that had no fields added to it) are in
122 /// general valid (depending on the merging mode) but add no entries to the destination.
123 RResult<void> Merge(std::span<ROOT::Internal::RPageSource *> sources,
125
126}; // end of class RNTupleMerger
127
128} // namespace Experimental::Internal
129} // namespace ROOT
130
131#endif
Given a set of RPageSources merge them into an RPagePersistentSink, optionally changing their compres...
void MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool, const ROOT::RClusterDescriptor &clusterDesc, std::span< const RColumnMergeInfo > commonColumns, const ROOT::Internal::RCluster::ColumnSet_t &commonColumnSet, std::size_t nCommonColumnsInCluster, RSealedPageMergeData &sealedPageData, const RNTupleMergeData &mergeData, ROOT::Internal::RPageAllocator &pageAlloc)
std::unique_ptr< ROOT::RNTupleModel > fModel
RNTupleMerger(std::unique_ptr< ROOT::Internal::RPagePersistentSink > destination, std::unique_ptr< ROOT::RNTupleModel > model)
Creates a RNTupleMerger with the given destination.
void MergeSourceClusters(ROOT::Internal::RPageSource &source, std::span< const RColumnMergeInfo > commonColumns, std::span< const RColumnMergeInfo > extraDstColumns, RNTupleMergeData &mergeData)
std::unique_ptr< ROOT::Internal::RPagePersistentSink > fDestination
std::unique_ptr< ROOT::Internal::RPageAllocator > fPageAlloc
RResult< void > Merge(std::span< ROOT::Internal::RPageSource * > sources, const RNTupleMergeOptions &mergeOpts=RNTupleMergeOptions())
Merge a given set of sources into the destination.
Managed a set of clusters containing compressed and packed pages.
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:150
Abstract interface to allocate and release pages.
Abstract interface to read data from an ntuple.
Metadata for RNTuple clusters.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:65
@ kStrict
The merger will refuse to merge any 2 RNTuples whose schema doesn't match exactly.
@ kUnion
The merger will update the output model to include all columns from all sources.
@ kFilter
The merger will discard all columns that aren't present in the prototype model (i....
@ kAbort
The merger will abort merging as soon as an error is encountered.
@ kSkip
Upon errors, the merger will skip the current source and continue.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Set of merging options to pass to RNTupleMerger.
ENTupleMergingMode fMergingMode
Determines how the merging treats sources with different models (.
ENTupleMergeErrBehavior fErrBehavior
Determines how the Merge function behaves upon merging errors.
std::optional< std::uint32_t > fCompressionSettings
If fCompressionSettings is empty (the default), the merger will not change the compression of any of ...
bool fExtraVerbose
If true, the merger will emit further diagnostics and information.