Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleMerger.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleMerger.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>, Max Orok <maxwellorok@gmail.com>, Alaettin Serhan Mete <amete@anl.gov>
4/// \date 2020-07-08
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleMerger
17#define ROOT7_RNTupleMerger
18
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/TTaskGroup.hxx>
24#include <Compression.h>
25
26#include <memory>
27#include <optional>
28
29namespace ROOT {
30
31class RNTuple;
32
33namespace Experimental::Internal {
34
36 /// The merger will discard all columns that aren't present in the prototype model (i.e. the model of the first
37 /// source)
38 kFilter,
39 /// The merger will refuse to merge any 2 RNTuples whose schema doesn't match exactly
40 kStrict,
41 /// The merger will update the output model to include all columns from all sources. Entries corresponding to columns
42 /// that are not present in a source will be set to the default value of the type.
43 kUnion
44};
45
47 /// The merger will abort merging as soon as an error is encountered
48 kAbort,
49 /// Upon errors, the merger will skip the current source and continue
50 kSkip
51};
52
53struct RColumnMergeInfo;
54struct RNTupleMergeData;
55struct RSealedPageMergeData;
56
57class RClusterPool;
58
59/// Set of merging options to pass to RNTupleMerger.
60/// If you're using the merger through TFileMerger you need to give it string-based options instead.
61/// Here is the mapping for the TFileMerger options:
62/// - "rntuple.MergingMode=(Filter|Union|...)" -> sets fMergingMode
63/// - "rntuple.ErrBehavior=(Abort|Skip|...)" -> sets fErrBehavior
64/// - "rntuple.ExtraVerbose" -> sets fExtraVerbose to true
65/// Rules about the string-based options:
66/// 1. there must be no space between the separators (i.e. `:` and `=`)
67/// 2. all string matching is case insensitive
69 /// If fCompressionSettings is empty (the default), the merger will not change the
70 /// compression of any of its sources (fast merging). Otherwise, all sources will be converted to the specified
71 /// compression algorithm and level.
72 std::optional<std::uint32_t> fCompressionSettings;
73 /// Determines how the merging treats sources with different models (\see ENTupleMergingMode).
75 /// Determines how the Merge function behaves upon merging errors
77 /// If true, the merger will emit further diagnostics and information.
78 bool fExtraVerbose = false;
79};
80
81// clang-format off
82/**
83 * \class ROOT::Experimental::Internal::RNTupleMerger
84 * \ingroup NTuple
85 * \brief Given a set of RPageSources merge them into an RPageSink, optionally changing their compression.
86 * This can also be used to change the compression of a single RNTuple by just passing a single source.
87 */
88// clang-format on
90 friend class ROOT::RNTuple;
91
92 std::unique_ptr<RPageSink> fDestination;
93 std::unique_ptr<RPageAllocator> fPageAlloc;
94 std::optional<TTaskGroup> fTaskGroup;
95 std::unique_ptr<RNTupleModel> fModel;
96
98 std::span<RColumnMergeInfo> commonColumns, const RCluster::ColumnSet_t &commonColumnSet,
100
101 void MergeSourceClusters(RPageSource &source, std::span<RColumnMergeInfo> commonColumns,
102 std::span<RColumnMergeInfo> extraDstColumns, RNTupleMergeData &mergeData);
103
104 /// Creates a RNTupleMerger with the given destination.
105 /// The model must be given if and only if `destination` has been initialized with that model
106 /// (i.e. in case of incremental merging).
107 RNTupleMerger(std::unique_ptr<RPageSink> destination, std::unique_ptr<RNTupleModel> model);
108
109public:
110 /// Creates a RNTupleMerger with the given destination.
111 explicit RNTupleMerger(std::unique_ptr<RPageSink> destination);
112
113 /// Merge a given set of sources into the destination.
114 RResult<void> Merge(std::span<RPageSource *> sources, const RNTupleMergeOptions &mergeOpts = RNTupleMergeOptions());
115
116}; // end of class RNTupleMerger
117
118} // namespace Experimental::Internal
119} // namespace ROOT
120
121#endif
Managed a set of clusters containing compressed and packed pages.
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:154
Given a set of RPageSources merge them into an RPageSink, optionally changing their compression.
RResult< void > Merge(std::span< RPageSource * > sources, const RNTupleMergeOptions &mergeOpts=RNTupleMergeOptions())
Merge a given set of sources into the destination.
void MergeSourceClusters(RPageSource &source, std::span< RColumnMergeInfo > commonColumns, std::span< RColumnMergeInfo > extraDstColumns, RNTupleMergeData &mergeData)
void MergeCommonColumns(RClusterPool &clusterPool, ROOT::DescriptorId_t clusterId, std::span< RColumnMergeInfo > commonColumns, const RCluster::ColumnSet_t &commonColumnSet, RSealedPageMergeData &sealedPageData, const RNTupleMergeData &mergeData)
std::unique_ptr< RNTupleModel > fModel
RNTupleMerger(std::unique_ptr< RPageSink > destination, std::unique_ptr< RNTupleModel > model)
Creates a RNTupleMerger with the given destination.
std::unique_ptr< RPageAllocator > fPageAlloc
std::unique_ptr< RPageSink > fDestination
Abstract interface to read data from an ntuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
@ kStrict
The merger will refuse to merge any 2 RNTuples whose schema doesn't match exactly.
@ kUnion
The merger will update the output model to include all columns from all sources.
@ kFilter
The merger will discard all columns that aren't present in the prototype model (i....
@ kAbort
The merger will abort merging as soon as an error is encountered.
@ kSkip
Upon errors, the merger will skip the current source and continue.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Set of merging options to pass to RNTupleMerger.
ENTupleMergingMode fMergingMode
Determines how the merging treats sources with different models (.
ENTupleMergeErrBehavior fErrBehavior
Determines how the Merge function behaves upon merging errors.
std::optional< std::uint32_t > fCompressionSettings
If fCompressionSettings is empty (the default), the merger will not change the compression of any of ...
bool fExtraVerbose
If true, the merger will emit further diagnostics and information.