Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \author Enrico Guiraud, CERN
4 \author Danilo Piparo, CERN
5 \date 2016-12
6 \author Vincenzo Eduardo Padulano
7 \author Stephan Hageboeck
8 \date 2025-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
20
21#include <ROOT/REntry.hxx>
22#include <ROOT/RFieldToken.hxx>
23#include <ROOT/RNTuple.hxx>
24#include <ROOT/RNTupleDS.hxx>
27#include <ROOT/RTTreeDS.hxx>
29
30#include <TBranchObject.h>
31#include <TClassEdit.h>
32#include <TDictionary.h>
33#include <TDataType.h>
34#include <TFile.h>
35#include <TLeaf.h>
36#include <TTreeReader.h>
37
38#include <algorithm>
39#include <type_traits>
40#include <utility>
41
43// Maintaining the following allows for faster vector resize:
44static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
45static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
46
47namespace {
48
49void AssertNoNullBranchAddresses(const std::vector<RBranchData> &branches)
50{
51 std::vector<TBranch *> branchesWithNullAddress;
52 for (const auto &branchData : branches) {
53 if (branchData.fOutputBranch->GetAddress() == nullptr)
54 branchesWithNullAddress.push_back(branchData.fOutputBranch);
55 }
56
57 if (branchesWithNullAddress.empty())
58 return;
59
60 // otherwise build error message and throw
61 std::vector<std::string> missingBranchNames;
63 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
64 std::string msg = "RDataFrame::Snapshot:";
65 if (missingBranchNames.size() == 1) {
66 msg += " branch " + missingBranchNames[0] +
67 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
68 "it is";
69 } else {
70 msg += " branches ";
71 for (const auto &bName : missingBranchNames)
72 msg += bName + ", ";
73 msg.resize(msg.size() - 2); // remove last ", "
74 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
75 }
76 msg += " not part of the set of branches that are being written out.";
77 throw std::runtime_error(msg);
78}
79
80TBranch *SearchForBranch(TTree *inputTree, const std::string &branchName)
81{
82 if (inputTree) {
83 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
84 return getBranchRes;
85
86 // try harder
87 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
88 return findBranchRes;
89 }
90 return nullptr;
91}
92
93std::vector<RBranchData>::iterator CreateCStyleArrayBranch(TTree &outputTree, std::vector<RBranchData> &outputBranches,
94 std::vector<RBranchData>::iterator thisBranch,
95 TBranch *inputBranch, int basketSize, void *address)
96{
97 if (!inputBranch)
98 return thisBranch;
99 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
100 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
101 return thisBranch;
102 // must construct the leaflist for the output branch and create the branch in the output tree
103 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
104 if (!leaf)
105 return thisBranch;
106 const auto bname = leaf->GetName();
107 auto *sizeLeaf = leaf->GetLeafCount();
108 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
109
110 // We proceed only if branch is a fixed-or-variable-sized array
111 if (sizeLeaf || leaf->GetLenStatic() > 1) {
112 if (sizeLeaf) {
113 // The array branch `bname` has dynamic size stored in leaf `sizeLeafName`, so we need to ensure that it's
114 // in the output tree.
115 auto sizeLeafIt =
116 std::find_if(outputBranches.begin(), outputBranches.end(),
117 [&sizeLeafName](const RBranchData &bd) { return bd.fOutputBranchName == sizeLeafName; });
118 if (sizeLeafIt == outputBranches.end()) {
119 // The size leaf is not part of the output branches yet, so emplace an empty slot for it.
120 // This means that iterators need to be updated in case the container reallocates.
121 const auto indexBeforeEmplace = std::distance(outputBranches.begin(), thisBranch);
122 outputBranches.emplace_back("", sizeLeafName, /*isDefine=*/false, /*typeID=*/nullptr);
125 }
126 if (!sizeLeafIt->fOutputBranch) {
127 // The size leaf was emplaced, but not initialised yet
129 // Use original basket size for existing branches otherwise use custom basket size.
130 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
131 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for
132 // `sizeLeafName`
133 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
134 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
135 sizeLeafIt->fOutputBranch = outputBranch;
136 }
137 }
138
139 const auto btype = leaf->GetTypeName();
141 if (rootbtype == ' ') {
142 Warning("Snapshot",
143 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
144 "leaf is of type '%s'. This column will not be written out.",
145 bname, btype);
146 return thisBranch;
147 }
148
149 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
150 // Use original basket size for existing branches and new basket size for new branches
151 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
152 void *addressForBranch = [address]() -> void * {
153 if (address) {
154 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
155 // its buffer, so we cast it and extract the address of the buffer
156 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
157 return rawRVec->data();
158 }
159 return nullptr;
160 }();
161 thisBranch->fOutputBranch =
162 outputTree.Branch(thisBranch->fOutputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
163 thisBranch->fOutputBranch->SetTitle(inputBranch->GetTitle());
164 thisBranch->fIsCArray = true;
165 }
166
167 return thisBranch;
168}
169
170void SetBranchAddress(TBranch *inputBranch, RBranchData &branchData, void *valueAddress)
171{
172 const static TClassRef TBOClRef("TBranchObject");
173 if (inputBranch && inputBranch->IsA() == TBOClRef) {
174 branchData.fOutputBranch->SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
175 } else if (branchData.fOutputBranch->IsA() != TBranch::Class()) {
176 // This is a relatively rare case of a fixed-size array getting redefined
177 branchData.fBranchAddressForCArrays = valueAddress;
178 branchData.fOutputBranch->SetAddress(&branchData.fBranchAddressForCArrays);
179 } else {
180 void *correctAddress = [valueAddress, isCArray = branchData.fIsCArray]() -> void * {
181 if (isCArray) {
182 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
183 // need its buffer, so we cast it and extract the address of the buffer
184 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
185 return rawRVec->data();
186 }
187 return valueAddress;
188 }();
189 branchData.fOutputBranch->SetAddress(correctAddress);
190 branchData.fBranchAddressForCArrays = valueAddress;
191 }
192}
193
195{
196 // Logic taken from
197 // TTree::BranchImpRef(
198 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
200 if (rootTypeChar == ' ') {
201 Warning("Snapshot",
202 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
203 "column will not be written out.",
204 bd.fOutputBranchName.c_str());
205 return;
206 }
207 std::string leafList{bd.fOutputBranchName + '/' + rootTypeChar};
208 bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
209}
210
211/// Ensure that an object with the input name can be written to the target file. This means checking that the
212/// TFile can be opened in the mode specified in `opts`, deleting any pre-existing objects with the same name in case
213/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
215 const std::string &fileName)
216{
217 TString fileMode = opts.fMode;
218 fileMode.ToLower();
219 if (fileMode != "update")
220 return;
221
222 // output file opened in "update" mode: must check whether target object name is already present in file
223 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "UPDATE_WITHOUT_GLOBALREGISTRATION")};
224 if (!outFile || outFile->IsZombie())
225 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
226
227 // Object is not present in the file, we are good
228 if (!outFile->GetKey(objName.c_str()))
229 return;
230
231 // object called objName is already present in the file
232 if (opts.fOverwriteIfExists) {
233 if (auto existingTree = outFile->Get<TTree>(objName.c_str()); existingTree) {
234 // Special case for TTree. TTree::Delete invalidates the 'this' pointer, so we don't wrap it in a
235 // std::unique_ptr.
236 existingTree->Delete("all");
237 } else {
238 // Ensure deletion of object and all its cycles.
239 outFile->Delete((objName + ";*").c_str());
240 }
241 } else {
242 const std::string msg = "Snapshot: object \"" + objName + "\" already present in file \"" + fileName +
243 "\". If you want to delete the original object and write another, please set the "
244 "'fOverwriteIfExists' option to true in RSnapshotOptions.";
245 throw std::invalid_argument(msg);
246 }
247}
248
250 std::vector<ROOT::Internal::RDF::RBranchData> &allBranchData, std::size_t currentIndex,
251 int basketSize, void *valueAddress)
252{
254 auto *inputBranch = branchData->fIsDefine ? nullptr : SearchForBranch(inputTree, branchData->fInputBranchName);
255
256 if (branchData->fOutputBranch && valueAddress) {
257 // The output branch was already created, we just need to (re)set its address
258 SetBranchAddress(inputBranch, *branchData, valueAddress);
259 return;
260 }
261
262 // Respect the original bufsize and splitlevel arguments
263 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
264 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
265 // TObject branch, see https://bit.ly/2EjLMId ).
266 // A user-provided basket size value takes precedence.
267 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
268 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
269
270 auto *dictionary = TDictionary::GetDictionary(*branchData->fInputTypeID);
271 if (dynamic_cast<TDataType *>(dictionary)) {
272 // Branch of fundamental type
274 return;
275 }
276
277 if (!branchData->fIsDefine) {
278 // Cases where we need a leaflist (e.g. C-style arrays)
279 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
280 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
281 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
282 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
284 }
285 if (branchData->fOutputBranch) {
286 // A branch was created in the previous function call
287 if (valueAddress) {
288 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
289 // need its buffer, so we cast it and extract the address of the buffer
290 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
291 branchData->fBranchAddressForCArrays = rawRVec->data();
292 }
293 return;
294 }
295
296 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
297 // Case of unsplit object with polymorphic type
298 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
299 branchData->fOutputBranch =
301 inputBranch->GetAddress(), bufSize, splitLevel);
302 // General case, with valid address
303 else if (valueAddress)
305 outputTree, branchData->fOutputBranchName.c_str(), classPtr, TDataType::GetType(*branchData->fInputTypeID),
307 // No value was passed, we're just creating a hollow branch to populate the dataset schema
308 else
309 branchData->fOutputBranch =
310 outputTree.Branch(branchData->fOutputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
311 return;
312 }
313
314 // We are not aware of other cases
315 throw std::logic_error(
316 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
317}
318
320{
323
324 if (options.fOutputFormat == OutputFormat::kTTree || options.fOutputFormat == OutputFormat::kDefault) {
325 // The default compression settings for TTree is 101
326 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
327 return ROOT::CompressionSettings(CompAlgo::kZLIB, 1);
328 }
330 } else if (options.fOutputFormat == OutputFormat::kRNTuple) {
331 // The default compression settings for RNTuple is 505
332 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
333 return ROOT::CompressionSettings(CompAlgo::kZSTD, 5);
334 }
336 } else {
337 throw std::invalid_argument("RDataFrame::Snapshot: unrecognized output format");
338 }
339}
340
341std::string ModeWithoutGlobalRegistration(const std::string &mode)
342{
343 if (mode.find("_WITHOUT_GLOBALREGISTRATION") != std::string::npos) {
344 return mode;
345 }
346 return mode + "_WITHOUT_GLOBALREGISTRATION";
347}
348
349} // namespace
350
352 const std::type_info *typeID)
353 : fInputBranchName{std::move(inputBranchName)},
354 fOutputBranchName{std::move(outputBranchName)},
355 fInputTypeID{typeID},
356 fIsDefine{isDefine}
357{
359 if (auto datatype = dynamic_cast<TDataType *>(dictionary); datatype) {
361 } else if (auto tclass = dynamic_cast<TClass *>(dictionary); tclass) {
362 fTypeData = EmptyDynamicType{tclass};
363 }
364}
365
366/// @brief Return a pointer to an empty instance of the type represented by this branch.
367/// For fundamental types, this is simply an 8-byte region of zeroes. For classes, it is an instance created with
368/// TClass::New.
369/// @param pointerToPointer Return a pointer to a pointer, so it can be used directly in TTree::SetBranchAddress().
371{
372 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
373 assert(!pointerToPointer); // Not used for fundamental types
374 return fundamental->fBytes.data();
375 }
376
377 auto &dynamic = std::get<EmptyDynamicType>(fTypeData);
378 if (!dynamic.fEmptyInstance) {
379 auto *dictionary = TDictionary::GetDictionary(*fInputTypeID);
380 assert(dynamic_cast<TDataType *>(dictionary) ==
381 nullptr); // TDataType should be handled by writing into the local buffer
382
383 auto tclass = dynamic_cast<TClass *>(dictionary);
384 assert(tclass);
385 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
386 }
387
388 if (pointerToPointer) {
389 // Make TTree happy (needs a pointer to pointer):
390 dynamic.fRawPtrToEmptyInstance = dynamic.fEmptyInstance.get();
391 return &dynamic.fRawPtrToEmptyInstance;
392 } else {
393 return dynamic.fEmptyInstance.get();
394 }
395}
396
397/// Point the branch address to an empty instance of the type represented by this branch
398/// or write null bytes into the space used by the fundamental type.
399/// This is used in case of variations, when certain defines/actions don't execute. We
400/// nevertheless need to write something, so we point the branch to an empty instance.
402{
403 if (!fOutputBranch)
404 return;
405
406 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
407 fundamental->fBytes.fill(std::byte{0});
408 } else {
409 // TTree expects pointer to pointer, to figure out who allocates the object:
410 fOutputBranch->SetAddress(EmptyInstance(/*pointerToPointer=*/true));
411 }
412}
413
415 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
416 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
418 const std::vector<const std::type_info *> &colTypeIDs)
419 : fFileName(filename),
420 fDirName(dirname),
421 fTreeName(treename),
422 fOptions(options),
423 fOutputLoopManager(loopManager),
424 fInputLoopManager(inputLM)
425{
427
429 fBranchData.reserve(vbnames.size());
430 for (unsigned int i = 0; i < vbnames.size(); ++i) {
431 fBranchData.emplace_back(vbnames[i], std::move(outputBranchNames[i]), isDefine[i], colTypeIDs[i]);
432 }
433}
434
435// Define special member methods here where the definition of all the data member types is available
439 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
440
442{
443 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
444 const auto fileOpenMode = [&]() {
445 TString checkupdate = fOptions.fMode;
446 checkupdate.ToLower();
447 return checkupdate == "update" ? "updated" : "created";
448 }();
449 Warning("Snapshot",
450 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
451 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
452 "its result in a variable and for example calling the GetValue() method on it.",
453 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
454 }
455}
456
458{
459 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
460 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
461 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
462 fInputTree = treeDS->GetTree();
463 fBranchAddressesNeedReset = true;
464}
465
466void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
467{
468 if (!fBranchAddressesNeedReset) {
469 UpdateCArraysPtrs(values);
470 } else {
471 SetBranches(values);
472 fBranchAddressesNeedReset = false;
473 }
474
475 fOutputTree->Fill();
476}
477
479{
480 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
481 // associated to those is re-allocated. As a result the value of the pointer can change therewith
482 // leaving associated to the branch of the output tree an invalid pointer.
483 // With this code, we set the value of the pointer in the output branch anew when needed.
484 assert(values.size() == fBranchData.size());
485 auto nValues = values.size();
486 for (decltype(nValues) i{}; i < nValues; i++) {
487 if (fBranchData[i].fIsCArray) {
488 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
489 // need its buffer, so we cast it and extract the address of the buffer
490 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
491 if (auto *data = rawRVec->data(); fBranchData[i].fBranchAddressForCArrays != data) {
492 fBranchData[i].fOutputBranch->SetAddress(data);
493 fBranchData[i].fBranchAddressForCArrays = data;
494 }
495 }
496 }
497}
498
500{
501 // create branches in output tree
502 assert(fBranchData.size() == values.size());
503 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
504 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
505 }
506 AssertNoNullBranchAddresses(fBranchData);
507}
508
510{
511 void *dummyValueAddress{};
512 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
513 SetBranchesHelper(inputTree, outputTree, fBranchData, i, fOptions.fBasketSize, dummyValueAddress);
514 }
515}
516
518{
519 fOutputFile.reset(TFile::Open(fFileName.c_str(), ModeWithoutGlobalRegistration(fOptions.fMode).c_str(),
520 /*ftitle=*/"", GetSnapshotCompressionSettings(fOptions)));
521 if (!fOutputFile)
522 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
523
524 TDirectory *outputDir = fOutputFile.get();
525 if (!fDirName.empty()) {
526 TString checkupdate = fOptions.fMode;
527 checkupdate.ToLower();
528 if (checkupdate == "update")
529 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
530 else
531 outputDir = fOutputFile->mkdir(fDirName.c_str());
532 }
533
534 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
535
536 if (fOptions.fAutoFlush)
537 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
538}
539
541{
542 assert(fOutputTree != nullptr);
543 assert(fOutputFile != nullptr);
544
545 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
546 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
547 if (fOutputTree->GetEntries() == 0) {
548 SetEmptyBranches(fInputTree, *fOutputTree);
549 }
550 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
551 fOutputTree->AutoSave("flushbaskets");
552 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
553 fOutputTree.reset();
554 fOutputFile->Close();
555
556 // Now connect the data source to the loop manager so it can be used for further processing
557 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
558 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
559}
560
561/**
562 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
563 *
564 * \param newName A type-erased string with the output file name
565 * \return UntypedSnapshotTTreeHelper
566 *
567 * This MakeNew implementation is tied to the cloning feature of actions
568 * of the computation graph. In particular, cloning a Snapshot node usually
569 * also involves changing the name of the output file, otherwise the cloned
570 * Snapshot would overwrite the same file.
571 */
574{
575 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
576 std::vector<std::string> inputBranchNames;
577 std::vector<std::string> outputBranchNames;
578 std::vector<bool> isDefine;
579 std::vector<const std::type_info *> inputColumnTypeIDs;
580 for (const auto &bd : fBranchData) {
581 if (bd.fInputBranchName.empty())
582 break;
583 inputBranchNames.push_back(bd.fInputBranchName);
584 outputBranchNames.push_back(bd.fOutputBranchName);
585 isDefine.push_back(bd.fIsDefine);
586 inputColumnTypeIDs.push_back(bd.fInputTypeID);
587 }
588
590 fDirName,
591 fTreeName,
592 std::move(inputBranchNames),
593 std::move(outputBranchNames),
594 fOptions,
595 std::move(isDefine),
596 fOutputLoopManager,
597 fInputLoopManager,
599}
600
602 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
603 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
605 const std::vector<const std::type_info *> &colTypeIDs)
606 : fNSlots(nSlots),
607 fOutputFiles(fNSlots),
608 fOutputTrees(fNSlots),
609 fBranchAddressesNeedReset(fNSlots, 1),
610 fInputTrees(fNSlots),
611 fFileName(filename),
612 fDirName(dirname),
613 fTreeName(treename),
614 fOptions(options),
615 fOutputLoopManager(loopManager),
616 fInputLoopManager(inputLM)
617{
619
621 fBranchData.reserve(fNSlots);
622 for (unsigned int slot = 0; slot < fNSlots; ++slot) {
623 fBranchData.emplace_back();
624 auto &thisSlot = fBranchData.back();
625 thisSlot.reserve(vbnames.size());
626 for (unsigned int i = 0; i < vbnames.size(); ++i) {
627 thisSlot.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
628 }
629 }
630}
631
632// Define special member methods here where the definition of all the data member types is available
637
639{
640 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
641 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
642 const auto fileOpenMode = [&]() {
643 TString checkupdate = fOptions.fMode;
644 checkupdate.ToLower();
645 return checkupdate == "update" ? "updated" : "created";
646 }();
647 Warning("Snapshot",
648 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
649 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
650 "its result in a variable and for example calling the GetValue() method on it.",
651 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
652 }
653}
654
656{
657 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
658 if (!fOutputFiles[slot]) {
659 // first time this thread executes something, let's create a TBufferMerger output directory
660 fOutputFiles[slot] = fMerger->GetFile();
661 }
662 TDirectory *treeDirectory = fOutputFiles[slot].get();
663 if (!fDirName.empty()) {
664 // call returnExistingDirectory=true since MT can end up making this call multiple times
665 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
666 }
667 // re-create output tree as we need to create its branches again, with new input variables
668 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
669 fOutputTrees[slot] =
670 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
671 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
672 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
673 fOutputTrees[slot]->SetImplicitMT(false);
674 if (fOptions.fAutoFlush)
675 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
676 if (r) {
677 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
678 fInputTrees[slot] = r->GetTree();
679 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
680 fInputTrees[slot] = treeDS->GetTree();
681 }
682 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
683}
684
686{
687 if (fOutputTrees[slot]->GetEntries() > 0)
688 fOutputFiles[slot]->Write();
689 for (auto &branchData : fBranchData[slot])
690 branchData.ClearBranchPointers(); // The branch pointers will go stale below
691 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
692 fOutputTrees[slot].reset(nullptr);
693}
694
695void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
696{
697 if (fBranchAddressesNeedReset[slot] == 0) {
698 UpdateCArraysPtrs(slot, values);
699 } else {
700 SetBranches(slot, values);
701 fBranchAddressesNeedReset[slot] = 0;
702 }
703 fOutputTrees[slot]->Fill();
704 auto entries = fOutputTrees[slot]->GetEntries();
705 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
706 if ((autoFlush > 0) && (entries % autoFlush == 0))
707 fOutputFiles[slot]->Write();
708}
709
711 const std::vector<void *> &values)
712{
713 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
714 // associated to those is re-allocated. As a result the value of the pointer can change therewith
715 // leaving associated to the branch of the output tree an invalid pointer.
716 // With this code, we set the value of the pointer in the output branch anew when needed.
717 assert(values.size() == fBranchData[slot].size());
718 auto nValues = values.size();
719 for (decltype(nValues) i{}; i < nValues; i++) {
720 auto &branchData = fBranchData[slot][i];
721 if (branchData.fIsCArray) {
722 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
723 // need its buffer, so we cast it and extract the address of the buffer
724 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
725 if (auto *data = rawRVec->data(); branchData.fBranchAddressForCArrays != data) {
726 // reset the branch address
727 branchData.fOutputBranch->SetAddress(data);
728 branchData.fBranchAddressForCArrays = data;
729 }
730 }
731 }
732}
733
735 const std::vector<void *> &values)
736{
737 // create branches in output tree
738 auto &branchData = fBranchData[slot];
739 assert(branchData.size() == values.size());
740 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
741 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], branchData, i, fOptions.fBasketSize, values[i]);
742 }
743
745}
746
748{
749 void *dummyValueAddress{};
750 auto &branchData = fBranchData.front();
751 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
753 }
754}
755
757{
758 auto outFile =
759 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), ModeWithoutGlobalRegistration(fOptions.fMode).c_str(),
760 /*ftitle=*/fFileName.c_str(), GetSnapshotCompressionSettings(fOptions))};
761 if (!outFile)
762 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
763 fOutputFile = outFile.get();
764 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
765}
766
768{
769
770 for (auto &file : fOutputFiles) {
771 if (file) {
772 file->Write();
773 file->Close();
774 }
775 }
776
777 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
778 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
779 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
780 assert(fOutputFile && "Missing output file in Snapshot finalization.");
781 // Use GetKey to avoid having to deal with memory management of the object in the file
782 if (!fOutputFile->GetKey(fullTreeName.c_str())) {
783
784 // First find in which directory we need to write the output TTree
785 TDirectory *treeDirectory = fOutputFile;
786 if (!fDirName.empty()) {
787 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
788 }
790
791 // Create the output TTree and create the user-requested branches
792 auto outTree =
793 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
794 TTree *inputTree{};
795 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
796 inputTree = treeDS->GetTree();
797 SetEmptyBranches(inputTree, *outTree);
798
799 fOutputFile->Write();
800 }
801
802 // flush all buffers to disk by destroying the TBufferMerger
803 fOutputFiles.clear();
804 fMerger.reset();
805
806 // Now connect the data source to the loop manager so it can be used for further processing
807 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
808}
809
810/**
811 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
812 *
813 * \param newName A type-erased string with the output file name
814 * \return UntypedSnapshotTTreeHelperMT
815 *
816 * This MakeNew implementation is tied to the cloning feature of actions
817 * of the computation graph. In particular, cloning a Snapshot node usually
818 * also involves changing the name of the output file, otherwise the cloned
819 * Snapshot would overwrite the same file.
820 */
823{
824 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
825 std::vector<std::string> inputBranchNames;
826 std::vector<std::string> outputBranchNames;
827 std::vector<bool> isDefine;
828 std::vector<const std::type_info *> inputColumnTypeIDs;
829 for (const auto &bd : fBranchData.front()) {
830 if (bd.fInputBranchName.empty())
831 break;
832 inputBranchNames.push_back(bd.fInputBranchName);
833 outputBranchNames.push_back(bd.fOutputBranchName);
834 isDefine.push_back(bd.fIsDefine);
835 inputColumnTypeIDs.push_back(bd.fInputTypeID);
836 }
837
839 finalName,
840 fDirName,
841 fTreeName,
842 std::move(inputBranchNames),
843 std::move(outputBranchNames),
844 fOptions,
845 std::move(isDefine),
846 fOutputLoopManager,
847 fInputLoopManager,
848 std::move(inputColumnTypeIDs)};
849}
850
852 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename,
853 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
855 const std::vector<const std::type_info *> &colTypeIDs)
856 : fFileName(filename),
857 fDirName(dirname),
858 fNTupleName(ntuplename),
859 fOptions(options),
860 fInputLoopManager(inputLM),
861 fOutputLoopManager(outputLM),
862 fInputFieldNames(vfnames),
863 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
864 fNSlots(nSlots),
865 fFillContexts(nSlots),
866 fEntries(nSlots),
867 fInputColumnTypeIDs(colTypeIDs)
868{
870}
871
872// Define special member methods here where the definition of all the data member types is available
877
879{
880 if (!fNTupleName.empty() /* not moved from */ && !fOutputFile /* did not run */ && fOptions.fLazy)
881 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
882}
883
885{
886 auto model = ROOT::RNTupleModel::CreateBare();
887 auto nFields = fOutputFieldNames.size();
888 fFieldTokens.resize(nFields);
889 for (decltype(nFields) i = 0; i < nFields; i++) {
890 // Need to retrieve the type of every field to create as a string
891 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
892 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
893 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
894 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
895 fInputFieldNames[i], fOptions.fVector2RVec)
896 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
897
898 // Cardinality fields are read-only, so instead we snapshot them as their inner type.
899 if (typeName.substr(0, 25) == "ROOT::RNTupleCardinality<") {
900 // Get "T" from "ROOT::RNTupleCardinality<T>".
901 std::string cardinalityType = typeName.substr(25, typeName.size() - 26);
902 Warning("Snapshot",
903 "Column \"%s\" is a read-only \"%s\" column. It will be snapshot as its inner type \"%s\" instead.",
904 fInputFieldNames[i].c_str(), typeName.c_str(), cardinalityType.c_str());
905 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], cardinalityType).Unwrap());
906 } else {
907 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
908 }
909 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
910 }
911 model->Freeze();
912
914 writeOptions.SetCompression(GetSnapshotCompressionSettings(fOptions));
915 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
916 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
917 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
918 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
919 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
920 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
921
922 fOutputFile.reset(TFile::Open(fFileName.c_str(), ModeWithoutGlobalRegistration(fOptions.fMode).c_str()));
923 if (!fOutputFile)
924 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
925
926 TDirectory *outputDir = fOutputFile.get();
927 if (!fDirName.empty()) {
928 TString checkupdate = fOptions.fMode;
929 checkupdate.ToLower();
930 if (checkupdate == "update")
931 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
932 else
933 outputDir = fOutputFile->mkdir(fDirName.c_str());
934 }
935
936 // The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
937 // calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
938 fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
939}
940
942{
943 if (!fFillContexts[slot]) {
944 fFillContexts[slot] = fWriter->CreateFillContext();
945 fEntries[slot] = fFillContexts[slot]->GetModel().CreateBareEntry();
946 }
947}
948
949void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int slot, const std::vector<void *> &values)
950{
951 auto &fillContext = fFillContexts[slot];
952 auto &outputEntry = fEntries[slot];
953 assert(values.size() == fFieldTokens.size());
954 for (decltype(values.size()) i = 0; i < values.size(); i++) {
955 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
956 }
957 fillContext->Fill(*outputEntry);
958}
959
961{
962 // In principle we would not need to flush a cluster here, but we want to benefit from parallelism for compression.
963 // NB: RNTupleFillContext::FlushCluster() is a nop if there is no new entry since the last flush.
964 fFillContexts[slot]->FlushCluster();
965}
966
968{
969 // First clear and destroy all entries, which were created from the RNTupleFillContexts.
970 fEntries.clear();
971 fFillContexts.clear();
972 // Then destroy the RNTupleParallelWriter and write the metadata.
973 fWriter.reset();
974 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
975 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
976}
977
978/**
979 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
980 *
981 * \param[in] newName A type-erased string with the output file name
982 * \return UntypedSnapshotRNTupleHelper
983 *
984 * This MakeNew implementation is tied to the cloning feature of actions
985 * of the computation graph. In particular, cloning a Snapshot node usually
986 * also involves changing the name of the output file, otherwise the cloned
987 * Snapshot would overwrite the same file.
988 */
991{
992 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
994 fNSlots, finalName, fDirName, fNTupleName, fInputFieldNames,
995 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
996}
997
998/*
999 * ------------------------------------
1000 * Snapshot with systematic variations
1001 * ------------------------------------
1002 */
1003namespace ROOT::Internal::RDF {
1004/// An object to store an output file and a tree in one common place to share them between instances
1005/// of Snapshot with systematic uncertainties.
1007 std::unique_ptr<TFile> fFile;
1008 std::unique_ptr<TTree> fTree;
1009 std::string fDirectoryName;
1011
1012 // Bitmasks to indicate whether syst. uncertainties have been computed. Bound to TBranches, so need to be stable in
1013 // memory.
1014 struct Bitmask {
1015 std::string branchName;
1016 std::bitset<64> bitset{};
1017 std::unique_ptr<uint64_t> branchBuffer{new uint64_t{}};
1018 };
1019 std::vector<Bitmask> fBitMasks;
1020
1021 std::unordered_map<std::string, unsigned int> fBranchToVariationMapping;
1022 // The corresponding ROOT dictionary is declared in core/clingutils/src
1023 std::unordered_map<std::string, std::pair<std::string, unsigned int>> fBranchToBitmaskMapping;
1024 unsigned int fNBits = 0;
1025
1028 {
1029 if (!fBranchToBitmaskMapping.empty()) {
1030 fFile->WriteObject(&fBranchToBitmaskMapping,
1031 (std::string{"R_rdf_column_to_bitmask_mapping_"} + fTree->GetName()).c_str());
1032 }
1033 if (fTree) {
1034 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1035 fTree->AutoSave("flushbaskets");
1036
1037 // Now connect the data source to the loop manager so it can be used for further processing
1038 std::string tree = fTree->GetName();
1039 if (!fDirectoryName.empty())
1040 tree = fDirectoryName + '/' + tree;
1041 std::string file = fFile->GetName();
1042
1043 fTree.reset();
1044 fFile.reset();
1045
1047 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(tree, file));
1048 }
1049 }
1050 SnapshotOutputWriter(SnapshotOutputWriter const &) = delete; // Anyway deleted because of the unique_ptrs
1053 delete; // Can be done, but need to make move-from object safe to destruct
1055
1056 /// Register a branch and corresponding systematic uncertainty.
1057 /// This will create an entry in the mapping from branch names to bitmasks, so the corresponding
1058 /// column can be masked if it doesn't contain valid entries. This mapping is written next to the
1059 /// tree into the output file.
1060 void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
1061 {
1062 if (auto it = fBranchToVariationMapping.find(branchName); it != fBranchToVariationMapping.end()) {
1063 if (variationIndex != it->second) {
1064 throw std::logic_error("Branch " + branchName +
1065 " is being registered with different variation index than the expected one: " +
1066 std::to_string(variationIndex));
1067 }
1068 return;
1069 }
1070
1071 // Neither branch nor systematic are known, so a new entry needs to be created
1072 fNBits = std::max(fNBits, variationIndex);
1073 const auto vectorIndex = variationIndex / 64u;
1074 const auto bitIndex = variationIndex % 64u;
1075
1076 // Create bitmask branches as long as necessary to capture the bit
1077 while (vectorIndex >= fBitMasks.size()) {
1078 std::string bitmaskBranchName =
1079 std::string{"R_rdf_mask_"} + fTree->GetName() + '_' + std::to_string(fBitMasks.size());
1081 fTree->Branch(bitmaskBranchName.c_str(), fBitMasks.back().branchBuffer.get());
1082 }
1083
1085 fBranchToBitmaskMapping[branchName] = std::make_pair(fBitMasks[vectorIndex].branchName, bitIndex);
1086 }
1087
1088 /// Clear all bits, as if none of the variations passed its filter.
1090 {
1091 for (auto &mask : fBitMasks)
1092 mask.bitset.reset();
1093 }
1094
1095 /// Set a bit signalling that the variation at `index` passed its filter.
1096 void SetMaskBit(unsigned int index)
1097 {
1098 const auto vectorIndex = index / 64;
1099 const auto bitIndex = index % 64;
1100 fBitMasks[vectorIndex].bitset.set(bitIndex, true);
1101 }
1102
1103 /// Test if any of the mask bits are set.
1104 bool MaskEmpty() const
1105 {
1106 return std::none_of(fBitMasks.begin(), fBitMasks.end(), [](Bitmask const &mask) { return mask.bitset.any(); });
1107 }
1108
1109 /// Write the current event and the bitmask to the output dataset.
1110 void Write() const
1111 {
1112 if (!fTree)
1113 throw std::runtime_error("The TTree associated to the Snapshot action doesn't exist, any more.");
1114
1115 for (auto const &mask : fBitMasks) {
1116 *mask.branchBuffer = mask.bitset.to_ullong();
1117 }
1118
1119 fTree->Fill();
1120 }
1121};
1122
1123} // namespace ROOT::Internal::RDF
1124
1126 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
1127 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1129 const std::vector<const std::type_info *> &colTypeIDs)
1130 : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
1131{
1132 EnsureValidSnapshotOutput(fOptions, std::string(treename), std::string(filename));
1133
1135 fOutputHandle = std::make_shared<SnapshotOutputWriter>(
1136 TFile::Open(filename.data(), ModeWithoutGlobalRegistration(fOptions.fMode).c_str(), /*ftitle=*/"",
1138 if (!fOutputHandle->fFile)
1139 throw std::runtime_error(std::string{"Snapshot: could not create output file "} + std::string{filename});
1140
1141 TDirectory *outputDir = fOutputHandle->fFile.get();
1142 if (!dirname.empty()) {
1143 fOutputHandle->fDirectoryName = dirname;
1145 checkupdate.ToLower();
1146 if (checkupdate == "update")
1147 outputDir =
1148 fOutputHandle->fFile->mkdir(std::string{dirname}.c_str(), "", true); // do not overwrite existing directory
1149 else
1150 outputDir = fOutputHandle->fFile->mkdir(std::string{dirname}.c_str());
1151 }
1152
1153 fOutputHandle->fTree = std::make_unique<TTree>(std::string{treename}.c_str(), std::string{treename}.c_str(),
1154 fOptions.fSplitLevel, /*dir=*/outputDir);
1155 fOutputHandle->fOutputLoopManager = fOutputLoopManager;
1156 if (fOptions.fAutoFlush)
1157 fOutputHandle->fTree->SetAutoFlush(fOptions.fAutoFlush);
1158
1160
1161 fBranchData.reserve(vbnames.size());
1162 for (unsigned int i = 0; i < vbnames.size(); ++i) {
1163 fOutputHandle->RegisterBranch(outputBranchNames[i], 0);
1164 fBranchData.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
1165 }
1166}
1167
1168/// Register a new column as a variation of the column at `originalColumnIndex`, and clone its properties.
1169/// If a nominal column is registered here, it is written without changes, but it means that it will be masked
1170/// in case its selection cuts don't pass.
1171/// \param slot Task ID for MT runs.
1172/// \param columnIndex Index where the data of this column will be passed into the helper.
1173/// \param originalColumnIndex If the column being registered is a variation of a "nominal" column, this designates the
1174/// original.
1175/// Properties such as name and output type are cloned from the original.
1176/// \param variationName The variation that this column belongs to. If "nominal" is used, this column is considered as
1177/// the original.
1179 unsigned int columnIndex,
1180 unsigned int originalColumnIndex,
1181 unsigned int variationIndex,
1182 std::string const &variationName)
1183{
1185 // This is a nominal column, but it participates in variations.
1186 // It always needs to be written, but we still need to create a mask bit to mark when nominal is invalid.
1187 assert(variationIndex == 0);
1188 fBranchData[columnIndex].fVariationIndex = 0;
1189 fOutputHandle->RegisterBranch(fBranchData[columnIndex].fOutputBranchName, variationIndex);
1190 } else if (columnIndex >= fBranchData.size()) {
1191 // First task, need to create branches
1192 fBranchData.resize(columnIndex + 1);
1193 auto &bd = fBranchData[columnIndex];
1194 bd = fBranchData[originalColumnIndex];
1195 std::string newOutputName = bd.fOutputBranchName + "__" + variationName;
1196 std::replace(newOutputName.begin(), newOutputName.end(), ':', '_');
1197 bd.fOutputBranchName = std::move(newOutputName);
1198 bd.fVariationIndex = variationIndex;
1199
1200 fOutputHandle->RegisterBranch(bd.fOutputBranchName, variationIndex);
1201 } else {
1202 assert(static_cast<unsigned int>(fBranchData[columnIndex].fVariationIndex) == variationIndex);
1203 }
1204}
1205
1206/// Bind all output branches to RDF columns for the given slots.
1208{
1209 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1210 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1211 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
1212 fInputTree = treeDS->GetTree();
1213
1214 // Create all output branches; and bind them to empty values
1215 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
1216 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1217 fBranchData[i].EmptyInstance(/*pointerToPointer=*/false));
1218 }
1219
1220 AssertNoNullBranchAddresses(fBranchData);
1221}
1222
1223/// Connect all output fields to the values pointed to by `values`, fill the output dataset,
1224/// call the Fill of the output tree, and clear the mask bits that show whether a variation was reached.
1225void ROOT::Internal::RDF::SnapshotHelperWithVariations::Exec(unsigned int /*slot*/, const std::vector<void *> &values,
1226 std::vector<bool> const &filterPassed)
1227{
1228 // Rebind branch pointers to RDF values
1229 assert(fBranchData.size() == values.size());
1230 for (std::size_t i = 0; i < values.size(); i++) {
1231 const auto variationIndex = fBranchData[i].fVariationIndex;
1232 if (variationIndex < 0) {
1233 // Branch without variations, it always needs to be written
1234 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1235 } else {
1236 // Nominal will always be written, systematics only if needed
1238 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1239 if (!fundamentalType) {
1240 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1241 }
1242 }
1243
1245 fOutputHandle->SetMaskBit(variationIndex);
1246 }
1247 }
1248 }
1249
1250 assert(!fOutputHandle->MaskEmpty()); // Exec should not have been called if nothing passes
1251
1252 fOutputHandle->Write();
1253 fOutputHandle->ClearMaskBits();
1254 for (auto &branchData : fBranchData) {
1255 branchData.ClearBranchContents();
1256 }
1257}
1258
1260{
1261 fOutputHandle.reset();
1262}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:61
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1525
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:71
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3787
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:305
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:424
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:369
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:200
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:654
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:228
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10133
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10127
std::vector< std::string > ColumnNames_t
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:347
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.