Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
SnapshotHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/SnapshotHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
20#ifndef RDF_SNAPSHOTHELPERS
21#define RDF_SNAPSHOTHELPERS
22
24
27#include <ROOT/RDF/Utils.hxx>
28
29#include <array>
30#include <memory>
31#include <variant>
32
33class TBranch;
34class TFile;
35
36namespace ROOT {
37class REntry;
38class RFieldToken;
39class RNTupleFillContext;
40class RNTupleParallelWriter;
41class TBufferMerger;
42class TBufferMergerFile;
43} // namespace ROOT
44
45namespace ROOT::Internal::RDF {
46
47class R__CLING_PTRCHECK(off) UntypedSnapshotRNTupleHelper final : public RActionImpl<UntypedSnapshotRNTupleHelper> {
48 std::string fFileName;
49 std::string fDirName;
50 std::string fNTupleName;
51
52 std::unique_ptr<TFile> fOutputFile;
53
57 ColumnNames_t fInputFieldNames; // This contains the resolved aliases
59 std::unique_ptr<ROOT::RNTupleParallelWriter> fWriter;
60 std::vector<ROOT::RFieldToken> fFieldTokens;
61
62 unsigned int fNSlots;
63 std::vector<std::shared_ptr<ROOT::RNTupleFillContext>> fFillContexts;
64 std::vector<std::unique_ptr<ROOT::REntry>> fEntries;
65
66 std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns
67
68public:
69 UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname,
70 std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames,
73 const std::vector<const std::type_info *> &colTypeIDs);
74
80
81 void Initialize();
82
83 void Exec(unsigned int slot, const std::vector<void *> &values);
84
85 void InitTask(TTreeReader *, unsigned int slot);
86
87 void FinalizeTask(unsigned int slot);
88
89 void Finalize();
90
91 std::string GetActionName() { return "Snapshot"; }
92
94 {
95 return [](unsigned int, const RSampleInfo &) mutable {};
96 }
97
99};
100
101/// Stores properties of each output branch in a Snapshot.
103 /// Stores variations of a fundamental type.
104 /// The bytes hold anything up to double or 64-bit numbers, and are cleared for every event.
105 /// This allows for binding the branches directly to these bytes.
107 static constexpr std::size_t fNBytes = 8;
108 alignas(8) std::array<std::byte, fNBytes> fBytes{std::byte{0}}; // 8 bytes to store any fundamental type
109 unsigned short fSize = 0;
110 FundamentalType(unsigned short size) : fSize(size) { assert(size <= fNBytes); }
111 };
112 /// Stores empty instances of classes, so a dummy object can be written when a systematic variation
113 /// doesn't pass a selection cut.
115 const TClass *fTClass = nullptr;
116 std::shared_ptr<void> fEmptyInstance = nullptr;
117 void *fRawPtrToEmptyInstance = nullptr; // Needed because TTree expects pointer to pointer
118 };
119
120 std::string fInputBranchName; // This contains resolved aliases
121 std::string fOutputBranchName;
122 const std::type_info *fInputTypeID = nullptr;
123 TBranch *fOutputBranch = nullptr;
124 void *fBranchAddressForCArrays = nullptr; // Used to detect if branch addresses need to be updated
125
126 // A negative index indicates no variations, 0 is for nominal, >0 marks columns that are only valid if a specific
127 // filter passed
128 int fVariationIndex = -1;
129 std::variant<FundamentalType, EmptyDynamicType> fTypeData = FundamentalType{0};
130 bool fIsCArray = false;
131 bool fIsDefine = false;
132
133 RBranchData() = default;
134 RBranchData(std::string inputBranchName, std::string outputBranchName, bool isDefine, const std::type_info *typeID);
135
137 {
138 fOutputBranch = nullptr;
139 fBranchAddressForCArrays = nullptr;
140 }
141 void *EmptyInstance(bool pointerToPointer);
142 void ClearBranchContents();
143 /// For fundamental types represented by TDataType, fetch a value from the pointer into the local branch buffer.
144 /// If the branch holds a class type, nothing happens.
145 /// \return true if the branch holds a fundamental type, false if it holds a class type.
147 {
148 if (auto fundamentalType = std::get_if<FundamentalType>(&fTypeData); fundamentalType) {
149 std::memcpy(fundamentalType->fBytes.data(), valuePtr, fundamentalType->fSize);
150 return true;
151 }
152 return false;
153 }
154};
155
156class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelper final : public RActionImpl<UntypedSnapshotTTreeHelper> {
157 std::string fFileName;
158 std::string fDirName;
159 std::string fTreeName;
161 std::unique_ptr<TFile> fOutputFile;
162 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
163 bool fBranchAddressesNeedReset{true};
164 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
165 std::vector<RBranchData> fBranchData; // Information for all output branches
168
169public:
170 UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
172 const RSnapshotOptions &options, std::vector<bool> &&isDefine,
174 const std::vector<const std::type_info *> &colTypeIDs);
175
181
182 void InitTask(TTreeReader *, unsigned int);
183
184 void Exec(unsigned int, const std::vector<void *> &values);
185
186 void UpdateCArraysPtrs(const std::vector<void *> &values);
187
188 void SetBranches(const std::vector<void *> &values);
189
190 void SetEmptyBranches(TTree *inputTree, TTree &outputTree);
191
192 void Initialize();
193
194 void Finalize();
195
196 std::string GetActionName() { return "Snapshot"; }
197
199 {
200 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
201 }
202
203 UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
204};
205
206class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelperMT final : public RActionImpl<UntypedSnapshotTTreeHelperMT> {
207
208 // IMT-specific data members
209
210 unsigned int fNSlots;
211 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
212 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
213 std::vector<std::unique_ptr<TTree>> fOutputTrees;
214 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
215 std::vector<TTree *> fInputTrees; // Current input trees, one per slot. Set at initialization time (`InitTask`)
216 std::vector<std::vector<RBranchData>> fBranchData; // Information for all output branches of each slot
217
218 // Attributes of the output TTree
219
220 std::string fFileName;
221 std::string fDirName;
222 std::string fTreeName;
223 TFile *fOutputFile; // Non-owning view on the output file
225
226 // Attributes related to the computation graph
227
230
231public:
232 UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname,
233 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
234 const RSnapshotOptions &options, std::vector<bool> &&isDefine,
236 const std::vector<const std::type_info *> &colTypeIDs);
237
243
244 void InitTask(TTreeReader *r, unsigned int slot);
245
246 void FinalizeTask(unsigned int slot);
247
248 void Exec(unsigned int slot, const std::vector<void *> &values);
249
250 void UpdateCArraysPtrs(unsigned int slot, const std::vector<void *> &values);
251
252 void SetBranches(unsigned int slot, const std::vector<void *> &values);
253
254 void SetEmptyBranches(TTree *inputTree, TTree &outputTree);
255
256 void Initialize();
257
258 void Finalize();
259
260 std::string GetActionName() { return "Snapshot"; }
261
263 {
264 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
265 }
266
267 UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
268};
269
270struct SnapshotOutputWriter;
271
272/// TTree snapshot helper with systematic variations.
274 : public ROOT::Detail::RDF::RActionImpl<SnapshotHelperWithVariations> {
276 std::shared_ptr<SnapshotOutputWriter> fOutputHandle;
277 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
278 std::vector<RBranchData> fBranchData;
279 ROOT::Detail::RDF::RLoopManager *fInputLoopManager = nullptr;
280 ROOT::Detail::RDF::RLoopManager *fOutputLoopManager = nullptr;
281
283
284public:
285 SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename,
286 const ColumnNames_t & /*vbnames*/, const ColumnNames_t &bnames,
287 const RSnapshotOptions &options, std::vector<bool> && /*isDefine*/,
290 const std::vector<const std::type_info *> &colTypeIDs);
291
297
298 void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex,
299 unsigned int varationIndex, std::string const &variationName);
300
301 void InitTask(TTreeReader *, unsigned int slot);
302
303 void Exec(unsigned int /*slot*/, const std::vector<void *> &values, std::vector<bool> const &filterPassed);
304
305 /// Nothing to do. All initialisations run in the constructor or InitTask().
306 void Initialize() {}
307
308 void Finalize();
309
310 std::string GetActionName() { return "SnapshotWithVariations"; }
311};
312
313} // namespace ROOT::Internal::RDF
314
315#endif
dim_t fSize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Base class for action helpers, see RInterface::Book() for more information.
The head node of a RDF computation graph.
TTree snapshot helper with systematic variations.
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(SnapshotHelperWithVariations const &) noexcept=delete
SnapshotHelperWithVariations(SnapshotHelperWithVariations &&) noexcept=default
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
std::vector< std::shared_ptr< ROOT::RNTupleFillContext > > fFillContexts
UntypedSnapshotRNTupleHelper(const UntypedSnapshotRNTupleHelper &)=delete
std::unique_ptr< ROOT::RNTupleParallelWriter > fWriter
UntypedSnapshotRNTupleHelper & operator=(const UntypedSnapshotRNTupleHelper &)=delete
std::vector< std::unique_ptr< ROOT::REntry > > fEntries
std::vector< const std::type_info * > fInputColumnTypeIDs
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
UntypedSnapshotRNTupleHelper(UntypedSnapshotRNTupleHelper &&) noexcept
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
UntypedSnapshotTTreeHelperMT(UntypedSnapshotTTreeHelperMT &&) noexcept
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
std::vector< std::shared_ptr< ROOT::TBufferMergerFile > > fOutputFiles
std::vector< std::vector< RBranchData > > fBranchData
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
UntypedSnapshotTTreeHelperMT & operator=(const UntypedSnapshotTTreeHelperMT &)=delete
std::vector< std::unique_ptr< TTree > > fOutputTrees
std::unique_ptr< ROOT::TBufferMerger > fMerger
UntypedSnapshotTTreeHelperMT(const UntypedSnapshotTTreeHelperMT &)=delete
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
UntypedSnapshotTTreeHelper(UntypedSnapshotTTreeHelper &&) noexcept
UntypedSnapshotTTreeHelper(const UntypedSnapshotTTreeHelper &)=delete
UntypedSnapshotTTreeHelper & operator=(const UntypedSnapshotTTreeHelper &)=delete
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
A TTree is a list of TBranches.
Definition TBranch.h:93
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
std::vector< std::string > ColumnNames_t
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
bool WriteValueIfFundamental(void *valuePtr)
For fundamental types represented by TDataType, fetch a value from the pointer into the local branch ...
A collection of options to steer the creation of the dataset on disk through Snapshot().