Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
21
22#include <ROOT/REntry.hxx>
23#include <ROOT/RFieldToken.hxx>
24#include <ROOT/RNTuple.hxx>
25#include <ROOT/RNTupleDS.hxx>
28#include <ROOT/RTTreeDS.hxx>
30
31#include <TBranchObject.h>
32#include <TClassEdit.h>
33#include <TDictionary.h>
34#include <TDataType.h>
35#include <TFile.h>
36#include <TLeaf.h>
37#include <TTreeReader.h>
38
39#include <algorithm>
40#include <type_traits>
41#include <utility>
42
44// Maintaining the following allows for faster vector resize:
45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
47
48namespace {
49
50void AssertNoNullBranchAddresses(const std::vector<RBranchData> &branches)
51{
52 std::vector<TBranch *> branchesWithNullAddress;
53 for (const auto &branchData : branches) {
54 if (branchData.fOutputBranch->GetAddress() == nullptr)
55 branchesWithNullAddress.push_back(branchData.fOutputBranch);
56 }
57
58 if (branchesWithNullAddress.empty())
59 return;
60
61 // otherwise build error message and throw
62 std::vector<std::string> missingBranchNames;
64 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
65 std::string msg = "RDataFrame::Snapshot:";
66 if (missingBranchNames.size() == 1) {
67 msg += " branch " + missingBranchNames[0] +
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
69 "it is";
70 } else {
71 msg += " branches ";
72 for (const auto &bName : missingBranchNames)
73 msg += bName + ", ";
74 msg.resize(msg.size() - 2); // remove last ", "
75 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
76 }
77 msg += " not part of the set of branches that are being written out.";
78 throw std::runtime_error(msg);
79}
80
81TBranch *SearchForBranch(TTree *inputTree, const std::string &branchName)
82{
83 if (inputTree) {
84 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
85 return getBranchRes;
86
87 // try harder
88 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
89 return findBranchRes;
90 }
91 return nullptr;
92}
93
94std::vector<RBranchData>::iterator CreateCStyleArrayBranch(TTree &outputTree, std::vector<RBranchData> &outputBranches,
95 std::vector<RBranchData>::iterator thisBranch,
96 TBranch *inputBranch, int basketSize, void *address)
97{
98 if (!inputBranch)
99 return thisBranch;
100 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
101 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
102 return thisBranch;
103 // must construct the leaflist for the output branch and create the branch in the output tree
104 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
105 if (!leaf)
106 return thisBranch;
107 const auto bname = leaf->GetName();
108 auto *sizeLeaf = leaf->GetLeafCount();
109 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
110
111 // We proceed only if branch is a fixed-or-variable-sized array
112 if (sizeLeaf || leaf->GetLenStatic() > 1) {
113 if (sizeLeaf) {
114 // The array branch `bname` has dynamic size stored in leaf `sizeLeafName`, so we need to ensure that it's
115 // in the output tree.
116 auto sizeLeafIt =
117 std::find_if(outputBranches.begin(), outputBranches.end(),
118 [&sizeLeafName](const RBranchData &bd) { return bd.fOutputBranchName == sizeLeafName; });
119 if (sizeLeafIt == outputBranches.end()) {
120 // The size leaf is not part of the output branches yet, so emplace an empty slot for it.
121 // This means that iterators need to be updated in case the container reallocates.
122 const auto indexBeforeEmplace = std::distance(outputBranches.begin(), thisBranch);
123 outputBranches.emplace_back("", sizeLeafName, /*isDefine=*/false, /*typeID=*/nullptr);
126 }
127 if (!sizeLeafIt->fOutputBranch) {
128 // The size leaf was emplaced, but not initialised yet
130 // Use original basket size for existing branches otherwise use custom basket size.
131 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
132 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for
133 // `sizeLeafName`
134 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
135 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
136 sizeLeafIt->fOutputBranch = outputBranch;
137 }
138 }
139
140 const auto btype = leaf->GetTypeName();
142 if (rootbtype == ' ') {
143 Warning("Snapshot",
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
146 bname, btype);
147 return thisBranch;
148 }
149
150 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
151 // Use original basket size for existing branches and new basket size for new branches
152 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
153 void *addressForBranch = [address]() -> void * {
154 if (address) {
155 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
156 // its buffer, so we cast it and extract the address of the buffer
157 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
158 return rawRVec->data();
159 }
160 return nullptr;
161 }();
162 thisBranch->fOutputBranch =
163 outputTree.Branch(thisBranch->fOutputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
164 thisBranch->fOutputBranch->SetTitle(inputBranch->GetTitle());
165 thisBranch->fIsCArray = true;
166 }
167
168 return thisBranch;
169}
170
171void SetBranchAddress(TBranch *inputBranch, RBranchData &branchData, void *valueAddress)
172{
173 const static TClassRef TBOClRef("TBranchObject");
174 if (inputBranch && inputBranch->IsA() == TBOClRef) {
175 branchData.fOutputBranch->SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
176 } else if (branchData.fOutputBranch->IsA() != TBranch::Class()) {
177 // This is a relatively rare case of a fixed-size array getting redefined
178 branchData.fBranchAddressForCArrays = valueAddress;
179 branchData.fOutputBranch->SetAddress(&branchData.fBranchAddressForCArrays);
180 } else {
181 void *correctAddress = [valueAddress, isCArray = branchData.fIsCArray]() -> void * {
182 if (isCArray) {
183 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
184 // need its buffer, so we cast it and extract the address of the buffer
185 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
186 return rawRVec->data();
187 }
188 return valueAddress;
189 }();
190 branchData.fOutputBranch->SetAddress(correctAddress);
191 branchData.fBranchAddressForCArrays = valueAddress;
192 }
193}
194
196{
197 // Logic taken from
198 // TTree::BranchImpRef(
199 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
201 if (rootTypeChar == ' ') {
202 Warning("Snapshot",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
206 return;
207 }
208 std::string leafList{bd.fOutputBranchName + '/' + rootTypeChar};
209 bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
210}
211
212/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
213/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
214/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
216 const std::string &fileName)
217{
218 TString fileMode = opts.fMode;
219 fileMode.ToLower();
220 if (fileMode != "update")
221 return;
222
223 // output file opened in "update" mode: must check whether output TTree is already present in file
224 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
225 if (!outFile || outFile->IsZombie())
226 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
227
228 TObject *outTree = outFile->Get(treeName.c_str());
229 if (outTree == nullptr)
230 return;
231
232 // object called treeName is already present in the file
233 if (opts.fOverwriteIfExists) {
234 if (outTree->InheritsFrom("TTree")) {
235 static_cast<TTree *>(outTree)->Delete("all");
236 } else {
237 outFile->Delete(treeName.c_str());
238 }
239 } else {
240 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
241 "\". If you want to delete the original tree and write another, please set "
242 "RSnapshotOptions::fOverwriteIfExists to true.";
243 throw std::invalid_argument(msg);
244 }
245}
246
247/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
248/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
249/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
251 const std::string &fileName)
252{
253 TString fileMode = opts.fMode;
254 fileMode.ToLower();
255 if (fileMode != "update")
256 return;
257
258 // output file opened in "update" mode: must check whether output RNTuple is already present in file
259 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
260 if (!outFile || outFile->IsZombie())
261 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
262
263 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
264
265 if (outNTuple) {
266 if (opts.fOverwriteIfExists) {
267 outFile->Delete((ntupleName + ";*").c_str());
268 return;
269 } else {
270 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
271 "\". If you want to delete the original ntuple and write another, please set "
272 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
273 throw std::invalid_argument(msg);
274 }
275 }
276
277 // Also check if there is any object other than an RNTuple with the provided ntupleName.
278 TObject *outObj = outFile->Get(ntupleName.c_str());
279
280 if (!outObj)
281 return;
282
283 // An object called ntupleName is already present in the file.
284 if (opts.fOverwriteIfExists) {
285 if (auto tree = dynamic_cast<TTree *>(outObj)) {
286 tree->Delete("all");
287 } else {
288 outFile->Delete((ntupleName + ";*").c_str());
289 }
290 } else {
291 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
292 "\". If you want to delete the original object and write a new RNTuple, please set "
293 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
294 throw std::invalid_argument(msg);
295 }
296}
297
299 std::vector<ROOT::Internal::RDF::RBranchData> &allBranchData, std::size_t currentIndex,
300 int basketSize, void *valueAddress)
301{
303 auto *inputBranch = branchData->fIsDefine ? nullptr : SearchForBranch(inputTree, branchData->fInputBranchName);
304
305 if (branchData->fOutputBranch && valueAddress) {
306 // The output branch was already created, we just need to (re)set its address
307 SetBranchAddress(inputBranch, *branchData, valueAddress);
308 return;
309 }
310
311 // Respect the original bufsize and splitlevel arguments
312 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
313 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
314 // TObject branch, see https://bit.ly/2EjLMId ).
315 // A user-provided basket size value takes precedence.
316 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
317 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
318
319 auto *dictionary = TDictionary::GetDictionary(*branchData->fInputTypeID);
320 if (dynamic_cast<TDataType *>(dictionary)) {
321 // Branch of fundamental type
323 return;
324 }
325
326 if (!branchData->fIsDefine) {
327 // Cases where we need a leaflist (e.g. C-style arrays)
328 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
329 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
330 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
331 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
333 }
334 if (branchData->fOutputBranch) {
335 // A branch was created in the previous function call
336 if (valueAddress) {
337 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
338 // need its buffer, so we cast it and extract the address of the buffer
339 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
340 branchData->fBranchAddressForCArrays = rawRVec->data();
341 }
342 return;
343 }
344
345 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
346 // Case of unsplit object with polymorphic type
347 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
348 branchData->fOutputBranch =
350 inputBranch->GetAddress(), bufSize, splitLevel);
351 // General case, with valid address
352 else if (valueAddress)
354 outputTree, branchData->fOutputBranchName.c_str(), classPtr, TDataType::GetType(*branchData->fInputTypeID),
356 // No value was passed, we're just creating a hollow branch to populate the dataset schema
357 else
358 branchData->fOutputBranch =
359 outputTree.Branch(branchData->fOutputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
360 return;
361 }
362
363 // We are not aware of other cases
364 throw std::logic_error(
365 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
366}
367
369{
372
373 if (options.fOutputFormat == OutputFormat::kTTree || options.fOutputFormat == OutputFormat::kDefault) {
374 // The default compression settings for TTree is 101
375 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
376 return ROOT::CompressionSettings(CompAlgo::kZLIB, 1);
377 }
379 } else if (options.fOutputFormat == OutputFormat::kRNTuple) {
380 // The default compression settings for RNTuple is 505
381 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
382 return ROOT::CompressionSettings(CompAlgo::kZSTD, 5);
383 }
385 } else {
386 throw std::invalid_argument("RDataFrame::Snapshot: unrecognized output format");
387 }
388}
389} // namespace
390
392 const std::type_info *typeID)
393 : fInputBranchName{std::move(inputBranchName)},
394 fOutputBranchName{std::move(outputBranchName)},
395 fInputTypeID{typeID},
396 fIsDefine{isDefine}
397{
399 if (auto datatype = dynamic_cast<TDataType *>(dictionary); datatype) {
401 } else if (auto tclass = dynamic_cast<TClass *>(dictionary); tclass) {
402 fTypeData = EmptyDynamicType{tclass};
403 }
404}
405
406/// @brief Return a pointer to an empty instance of the type represented by this branch.
407/// For fundamental types, this is simply an 8-byte region of zeroes. For classes, it is an instance created with
408/// TClass::New.
409/// @param pointerToPointer Return a pointer to a pointer, so it can be used directly in TTree::SetBranchAddress().
411{
412 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
413 assert(!pointerToPointer); // Not used for fundamental types
414 return fundamental->fBytes.data();
415 }
416
417 auto &dynamic = std::get<EmptyDynamicType>(fTypeData);
418 if (!dynamic.fEmptyInstance) {
419 auto *dictionary = TDictionary::GetDictionary(*fInputTypeID);
420 assert(dynamic_cast<TDataType *>(dictionary) ==
421 nullptr); // TDataType should be handled by writing into the local buffer
422
423 auto tclass = dynamic_cast<TClass *>(dictionary);
424 assert(tclass);
425 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
426 }
427
428 if (pointerToPointer) {
429 // Make TTree happy (needs a pointer to pointer):
430 dynamic.fRawPtrToEmptyInstance = dynamic.fEmptyInstance.get();
431 return &dynamic.fRawPtrToEmptyInstance;
432 } else {
433 return dynamic.fEmptyInstance.get();
434 }
435}
436
437/// Point the branch address to an empty instance of the type represented by this branch
438/// or write null bytes into the space used by the fundamental type.
439/// This is used in case of variations, when certain defines/actions don't execute. We
440/// nevertheless need to write something, so we point the branch to an empty instance.
442{
443 if (!fOutputBranch)
444 return;
445
446 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
447 fundamental->fBytes.fill(std::byte{0});
448 } else {
449 // TTree expects pointer to pointer, to figure out who allocates the object:
450 fOutputBranch->SetAddress(EmptyInstance(/*pointerToPointer=*/true));
451 }
452}
453
455 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
456 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
458 const std::vector<const std::type_info *> &colTypeIDs)
459 : fFileName(filename),
460 fDirName(dirname),
461 fTreeName(treename),
462 fOptions(options),
463 fOutputLoopManager(loopManager),
464 fInputLoopManager(inputLM)
465{
467
469 fBranchData.reserve(vbnames.size());
470 for (unsigned int i = 0; i < vbnames.size(); ++i) {
471 fBranchData.emplace_back(vbnames[i], std::move(outputBranchNames[i]), isDefine[i], colTypeIDs[i]);
472 }
473}
474
475// Define special member methods here where the definition of all the data member types is available
479 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
480
482{
483 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
484 const auto fileOpenMode = [&]() {
485 TString checkupdate = fOptions.fMode;
486 checkupdate.ToLower();
487 return checkupdate == "update" ? "updated" : "created";
488 }();
489 Warning("Snapshot",
490 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
491 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
492 "its result in a variable and for example calling the GetValue() method on it.",
493 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
494 }
495}
496
498{
499 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
500 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
501 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
502 fInputTree = treeDS->GetTree();
503 fBranchAddressesNeedReset = true;
504}
505
506void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
507{
508 if (!fBranchAddressesNeedReset) {
509 UpdateCArraysPtrs(values);
510 } else {
511 SetBranches(values);
512 fBranchAddressesNeedReset = false;
513 }
514
515 fOutputTree->Fill();
516}
517
519{
520 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
521 // associated to those is re-allocated. As a result the value of the pointer can change therewith
522 // leaving associated to the branch of the output tree an invalid pointer.
523 // With this code, we set the value of the pointer in the output branch anew when needed.
524 assert(values.size() == fBranchData.size());
525 auto nValues = values.size();
526 for (decltype(nValues) i{}; i < nValues; i++) {
527 if (fBranchData[i].fIsCArray) {
528 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
529 // need its buffer, so we cast it and extract the address of the buffer
530 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
531 if (auto *data = rawRVec->data(); fBranchData[i].fBranchAddressForCArrays != data) {
532 fBranchData[i].fOutputBranch->SetAddress(data);
533 fBranchData[i].fBranchAddressForCArrays = data;
534 }
535 }
536 }
537}
538
540{
541 // create branches in output tree
542 assert(fBranchData.size() == values.size());
543 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
544 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
545 }
546 AssertNoNullBranchAddresses(fBranchData);
547}
548
550{
551 void *dummyValueAddress{};
552 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
553 SetBranchesHelper(inputTree, outputTree, fBranchData, i, fOptions.fBasketSize, dummyValueAddress);
554 }
555}
556
558{
559 fOutputFile.reset(
560 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"", GetSnapshotCompressionSettings(fOptions)));
561 if (!fOutputFile)
562 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
563
564 TDirectory *outputDir = fOutputFile.get();
565 if (!fDirName.empty()) {
566 TString checkupdate = fOptions.fMode;
567 checkupdate.ToLower();
568 if (checkupdate == "update")
569 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
570 else
571 outputDir = fOutputFile->mkdir(fDirName.c_str());
572 }
573
574 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
575
576 if (fOptions.fAutoFlush)
577 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
578}
579
581{
582 assert(fOutputTree != nullptr);
583 assert(fOutputFile != nullptr);
584
585 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
586 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
587 if (fOutputTree->GetEntries() == 0) {
588 SetEmptyBranches(fInputTree, *fOutputTree);
589 }
590 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
591 fOutputTree->AutoSave("flushbaskets");
592 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
593 fOutputTree.reset();
594 fOutputFile->Close();
595
596 // Now connect the data source to the loop manager so it can be used for further processing
597 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
598 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
599}
600
601/**
602 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
603 *
604 * \param newName A type-erased string with the output file name
605 * \return UntypedSnapshotTTreeHelper
606 *
607 * This MakeNew implementation is tied to the cloning feature of actions
608 * of the computation graph. In particular, cloning a Snapshot node usually
609 * also involves changing the name of the output file, otherwise the cloned
610 * Snapshot would overwrite the same file.
611 */
614{
615 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
616 std::vector<std::string> inputBranchNames;
617 std::vector<std::string> outputBranchNames;
618 std::vector<bool> isDefine;
619 std::vector<const std::type_info *> inputColumnTypeIDs;
620 for (const auto &bd : fBranchData) {
621 if (bd.fInputBranchName.empty())
622 break;
623 inputBranchNames.push_back(bd.fInputBranchName);
624 outputBranchNames.push_back(bd.fOutputBranchName);
625 isDefine.push_back(bd.fIsDefine);
626 inputColumnTypeIDs.push_back(bd.fInputTypeID);
627 }
628
630 fDirName,
631 fTreeName,
632 std::move(inputBranchNames),
633 std::move(outputBranchNames),
634 fOptions,
635 std::move(isDefine),
636 fOutputLoopManager,
637 fInputLoopManager,
639}
640
642 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
643 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
645 const std::vector<const std::type_info *> &colTypeIDs)
646 : fNSlots(nSlots),
647 fOutputFiles(fNSlots),
648 fOutputTrees(fNSlots),
649 fBranchAddressesNeedReset(fNSlots, 1),
650 fInputTrees(fNSlots),
651 fFileName(filename),
652 fDirName(dirname),
653 fTreeName(treename),
654 fOptions(options),
655 fOutputLoopManager(loopManager),
656 fInputLoopManager(inputLM)
657{
659
661 fBranchData.reserve(fNSlots);
662 for (unsigned int slot = 0; slot < fNSlots; ++slot) {
663 fBranchData.emplace_back();
664 auto &thisSlot = fBranchData.back();
665 thisSlot.reserve(vbnames.size());
666 for (unsigned int i = 0; i < vbnames.size(); ++i) {
667 thisSlot.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
668 }
669 }
670}
671
672// Define special member methods here where the definition of all the data member types is available
677
679{
680 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
681 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
682 const auto fileOpenMode = [&]() {
683 TString checkupdate = fOptions.fMode;
684 checkupdate.ToLower();
685 return checkupdate == "update" ? "updated" : "created";
686 }();
687 Warning("Snapshot",
688 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
689 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
690 "its result in a variable and for example calling the GetValue() method on it.",
691 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
692 }
693}
694
696{
697 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
698 if (!fOutputFiles[slot]) {
699 // first time this thread executes something, let's create a TBufferMerger output directory
700 fOutputFiles[slot] = fMerger->GetFile();
701 }
702 TDirectory *treeDirectory = fOutputFiles[slot].get();
703 if (!fDirName.empty()) {
704 // call returnExistingDirectory=true since MT can end up making this call multiple times
705 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
706 }
707 // re-create output tree as we need to create its branches again, with new input variables
708 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
709 fOutputTrees[slot] =
710 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
711 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
712 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
713 fOutputTrees[slot]->SetImplicitMT(false);
714 if (fOptions.fAutoFlush)
715 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
716 if (r) {
717 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
718 fInputTrees[slot] = r->GetTree();
719 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
720 fInputTrees[slot] = treeDS->GetTree();
721 }
722 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
723}
724
726{
727 if (fOutputTrees[slot]->GetEntries() > 0)
728 fOutputFiles[slot]->Write();
729 for (auto &branchData : fBranchData[slot])
730 branchData.ClearBranchPointers(); // The branch pointers will go stale below
731 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
732 fOutputTrees[slot].reset(nullptr);
733}
734
735void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
736{
737 if (fBranchAddressesNeedReset[slot] == 0) {
738 UpdateCArraysPtrs(slot, values);
739 } else {
740 SetBranches(slot, values);
741 fBranchAddressesNeedReset[slot] = 0;
742 }
743 fOutputTrees[slot]->Fill();
744 auto entries = fOutputTrees[slot]->GetEntries();
745 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
746 if ((autoFlush > 0) && (entries % autoFlush == 0))
747 fOutputFiles[slot]->Write();
748}
749
751 const std::vector<void *> &values)
752{
753 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
754 // associated to those is re-allocated. As a result the value of the pointer can change therewith
755 // leaving associated to the branch of the output tree an invalid pointer.
756 // With this code, we set the value of the pointer in the output branch anew when needed.
757 assert(values.size() == fBranchData[slot].size());
758 auto nValues = values.size();
759 for (decltype(nValues) i{}; i < nValues; i++) {
760 auto &branchData = fBranchData[slot][i];
761 if (branchData.fIsCArray) {
762 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
763 // need its buffer, so we cast it and extract the address of the buffer
764 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
765 if (auto *data = rawRVec->data(); branchData.fBranchAddressForCArrays != data) {
766 // reset the branch address
767 branchData.fOutputBranch->SetAddress(data);
768 branchData.fBranchAddressForCArrays = data;
769 }
770 }
771 }
772}
773
775 const std::vector<void *> &values)
776{
777 // create branches in output tree
778 auto &branchData = fBranchData[slot];
779 assert(branchData.size() == values.size());
780 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
781 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], branchData, i, fOptions.fBasketSize, values[i]);
782 }
783
785}
786
788{
789 void *dummyValueAddress{};
790 auto &branchData = fBranchData.front();
791 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
793 }
794}
795
797{
798 auto outFile =
799 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(),
801 if (!outFile)
802 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
803 fOutputFile = outFile.get();
804 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
805}
806
808{
809
810 for (auto &file : fOutputFiles) {
811 if (file) {
812 file->Write();
813 file->Close();
814 }
815 }
816
817 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
818 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
819 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
820 assert(fOutputFile && "Missing output file in Snapshot finalization.");
821 if (!fOutputFile->Get(fullTreeName.c_str())) {
822
823 // First find in which directory we need to write the output TTree
824 TDirectory *treeDirectory = fOutputFile;
825 if (!fDirName.empty()) {
826 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
827 }
829
830 // Create the output TTree and create the user-requested branches
831 auto outTree =
832 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
833 TTree *inputTree{};
834 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
835 inputTree = treeDS->GetTree();
836 SetEmptyBranches(inputTree, *outTree);
837
838 fOutputFile->Write();
839 }
840
841 // flush all buffers to disk by destroying the TBufferMerger
842 fOutputFiles.clear();
843 fMerger.reset();
844
845 // Now connect the data source to the loop manager so it can be used for further processing
846 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
847}
848
849/**
850 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
851 *
852 * \param newName A type-erased string with the output file name
853 * \return UntypedSnapshotTTreeHelperMT
854 *
855 * This MakeNew implementation is tied to the cloning feature of actions
856 * of the computation graph. In particular, cloning a Snapshot node usually
857 * also involves changing the name of the output file, otherwise the cloned
858 * Snapshot would overwrite the same file.
859 */
862{
863 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
864 std::vector<std::string> inputBranchNames;
865 std::vector<std::string> outputBranchNames;
866 std::vector<bool> isDefine;
867 std::vector<const std::type_info *> inputColumnTypeIDs;
868 for (const auto &bd : fBranchData.front()) {
869 if (bd.fInputBranchName.empty())
870 break;
871 inputBranchNames.push_back(bd.fInputBranchName);
872 outputBranchNames.push_back(bd.fOutputBranchName);
873 isDefine.push_back(bd.fIsDefine);
874 inputColumnTypeIDs.push_back(bd.fInputTypeID);
875 }
876
878 finalName,
879 fDirName,
880 fTreeName,
881 std::move(inputBranchNames),
882 std::move(outputBranchNames),
883 fOptions,
884 std::move(isDefine),
885 fOutputLoopManager,
886 fInputLoopManager,
887 std::move(inputColumnTypeIDs)};
888}
889
891 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename,
892 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
894 const std::vector<const std::type_info *> &colTypeIDs)
895 : fFileName(filename),
896 fDirName(dirname),
897 fNTupleName(ntuplename),
898 fOptions(options),
899 fInputLoopManager(inputLM),
900 fOutputLoopManager(outputLM),
901 fInputFieldNames(vfnames),
902 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
903 fNSlots(nSlots),
904 fFillContexts(nSlots),
905 fEntries(nSlots),
906 fInputColumnTypeIDs(colTypeIDs)
907{
909}
910
911// Define special member methods here where the definition of all the data member types is available
916
918{
919 if (!fNTupleName.empty() /* not moved from */ && !fOutputFile /* did not run */ && fOptions.fLazy)
920 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
921}
922
924{
925 auto model = ROOT::RNTupleModel::CreateBare();
926 auto nFields = fOutputFieldNames.size();
927 fFieldTokens.resize(nFields);
928 for (decltype(nFields) i = 0; i < nFields; i++) {
929 // Need to retrieve the type of every field to create as a string
930 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
931 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
932 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
933 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
934 fInputFieldNames[i], fOptions.fVector2RVec)
935 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
936 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
937 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
938 }
939 model->Freeze();
940
942 writeOptions.SetCompression(GetSnapshotCompressionSettings(fOptions));
943 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
944 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
945 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
946 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
947 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
948 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
949
950 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
951 if (!fOutputFile)
952 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
953
954 TDirectory *outputDir = fOutputFile.get();
955 if (!fDirName.empty()) {
956 TString checkupdate = fOptions.fMode;
957 checkupdate.ToLower();
958 if (checkupdate == "update")
959 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
960 else
961 outputDir = fOutputFile->mkdir(fDirName.c_str());
962 }
963
964 // The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
965 // calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
966 fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
967}
968
970{
971 if (!fFillContexts[slot]) {
972 fFillContexts[slot] = fWriter->CreateFillContext();
973 fEntries[slot] = fFillContexts[slot]->GetModel().CreateBareEntry();
974 }
975}
976
977void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int slot, const std::vector<void *> &values)
978{
979 auto &fillContext = fFillContexts[slot];
980 auto &outputEntry = fEntries[slot];
981 assert(values.size() == fFieldTokens.size());
982 for (decltype(values.size()) i = 0; i < values.size(); i++) {
983 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
984 }
985 fillContext->Fill(*outputEntry);
986}
987
989{
990 // In principle we would not need to flush a cluster here, but we want to benefit from parallelism for compression.
991 // NB: RNTupleFillContext::FlushCluster() is a nop if there is no new entry since the last flush.
992 fFillContexts[slot]->FlushCluster();
993}
994
996{
997 // First clear and destroy all entries, which were created from the RNTupleFillContexts.
998 fEntries.clear();
999 fFillContexts.clear();
1000 // Then destroy the RNTupleParallelWriter and write the metadata.
1001 fWriter.reset();
1002 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
1003 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
1004}
1005
1006/**
1007 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
1008 *
1009 * \param[in] newName A type-erased string with the output file name
1010 * \return UntypedSnapshotRNTupleHelper
1011 *
1012 * This MakeNew implementation is tied to the cloning feature of actions
1013 * of the computation graph. In particular, cloning a Snapshot node usually
1014 * also involves changing the name of the output file, otherwise the cloned
1015 * Snapshot would overwrite the same file.
1016 */
1019{
1020 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1022 fNSlots, finalName, fDirName, fNTupleName, fInputFieldNames,
1023 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
1024}
1025
1026/*
1027 * ------------------------------------
1028 * Snapshot with systematic variations
1029 * ------------------------------------
1030 */
1031namespace ROOT::Internal::RDF {
1032/// An object to store an output file and a tree in one common place to share them between instances
1033/// of Snapshot with systematic uncertainties.
1035 std::unique_ptr<TFile> fFile;
1036 std::unique_ptr<TTree> fTree;
1037 std::string fDirectoryName;
1039
1040 // Bitmasks to indicate whether syst. uncertainties have been computed. Bound to TBranches, so need to be stable in
1041 // memory.
1042 struct Bitmask {
1043 std::string branchName;
1044 std::bitset<64> bitset{};
1045 std::unique_ptr<uint64_t> branchBuffer{new uint64_t{}};
1046 };
1047 std::vector<Bitmask> fBitMasks;
1048
1049 std::unordered_map<std::string, unsigned int> fBranchToVariationMapping;
1050 // The corresponding ROOT dictionary is declared in core/clingutils/src
1051 std::unordered_map<std::string, std::pair<std::string, unsigned int>> fBranchToBitmaskMapping;
1052 unsigned int fNBits = 0;
1053
1056 {
1057 if (!fBranchToBitmaskMapping.empty()) {
1058 fFile->WriteObject(&fBranchToBitmaskMapping,
1059 (std::string{"R_rdf_column_to_bitmask_mapping_"} + fTree->GetName()).c_str());
1060 }
1061 if (fTree) {
1062 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1063 fTree->AutoSave("flushbaskets");
1064
1065 // Now connect the data source to the loop manager so it can be used for further processing
1066 std::string tree = fTree->GetName();
1067 if (!fDirectoryName.empty())
1068 tree = fDirectoryName + '/' + tree;
1069 std::string file = fFile->GetName();
1070
1071 fTree.reset();
1072 fFile.reset();
1073
1075 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(tree, file));
1076 }
1077 }
1078 SnapshotOutputWriter(SnapshotOutputWriter const &) = delete; // Anyway deleted because of the unique_ptrs
1081 delete; // Can be done, but need to make move-from object safe to destruct
1083
1084 /// Register a branch and corresponding systematic uncertainty.
1085 /// This will create an entry in the mapping from branch names to bitmasks, so the corresponding
1086 /// column can be masked if it doesn't contain valid entries. This mapping is written next to the
1087 /// tree into the output file.
1088 void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
1089 {
1090 if (auto it = fBranchToVariationMapping.find(branchName); it != fBranchToVariationMapping.end()) {
1091 if (variationIndex != it->second) {
1092 throw std::logic_error("Branch " + branchName +
1093 " is being registered with different variation index than the expected one: " +
1094 std::to_string(variationIndex));
1095 }
1096 return;
1097 }
1098
1099 // Neither branch nor systematic are known, so a new entry needs to be created
1100 fNBits = std::max(fNBits, variationIndex);
1101 const auto vectorIndex = variationIndex / 64u;
1102 const auto bitIndex = variationIndex % 64u;
1103
1104 // Create bitmask branches as long as necessary to capture the bit
1105 while (vectorIndex >= fBitMasks.size()) {
1106 std::string bitmaskBranchName =
1107 std::string{"R_rdf_mask_"} + fTree->GetName() + '_' + std::to_string(fBitMasks.size());
1109 fTree->Branch(bitmaskBranchName.c_str(), fBitMasks.back().branchBuffer.get());
1110 }
1111
1113 fBranchToBitmaskMapping[branchName] = std::make_pair(fBitMasks[vectorIndex].branchName, bitIndex);
1114 }
1115
1116 /// Clear all bits, as if none of the variations passed its filter.
1118 {
1119 for (auto &mask : fBitMasks)
1120 mask.bitset.reset();
1121 }
1122
1123 /// Set a bit signalling that the variation at `index` passed its filter.
1124 void SetMaskBit(unsigned int index)
1125 {
1126 const auto vectorIndex = index / 64;
1127 const auto bitIndex = index % 64;
1128 fBitMasks[vectorIndex].bitset.set(bitIndex, true);
1129 }
1130
1131 /// Test if any of the mask bits are set.
1132 bool MaskEmpty() const
1133 {
1134 return std::none_of(fBitMasks.begin(), fBitMasks.end(), [](Bitmask const &mask) { return mask.bitset.any(); });
1135 }
1136
1137 /// Write the current event and the bitmask to the output dataset.
1138 void Write() const
1139 {
1140 if (!fTree)
1141 throw std::runtime_error("The TTree associated to the Snapshot action doesn't exist, any more.");
1142
1143 for (auto const &mask : fBitMasks) {
1144 *mask.branchBuffer = mask.bitset.to_ullong();
1145 }
1146
1147 fTree->Fill();
1148 }
1149};
1150
1151} // namespace ROOT::Internal::RDF
1152
1154 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
1155 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1157 const std::vector<const std::type_info *> &colTypeIDs)
1158 : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
1159{
1160 EnsureValidSnapshotTTreeOutput(fOptions, std::string(treename), std::string(filename));
1161
1163 fOutputHandle = std::make_shared<SnapshotOutputWriter>(
1164 TFile::Open(filename.data(), fOptions.fMode.c_str(), /*ftitle=*/"", GetSnapshotCompressionSettings(fOptions)));
1165 if (!fOutputHandle->fFile)
1166 throw std::runtime_error(std::string{"Snapshot: could not create output file "} + std::string{filename});
1167
1168 TDirectory *outputDir = fOutputHandle->fFile.get();
1169 if (!dirname.empty()) {
1170 fOutputHandle->fDirectoryName = dirname;
1172 checkupdate.ToLower();
1173 if (checkupdate == "update")
1174 outputDir =
1175 fOutputHandle->fFile->mkdir(std::string{dirname}.c_str(), "", true); // do not overwrite existing directory
1176 else
1177 outputDir = fOutputHandle->fFile->mkdir(std::string{dirname}.c_str());
1178 }
1179
1180 fOutputHandle->fTree = std::make_unique<TTree>(std::string{treename}.c_str(), std::string{treename}.c_str(),
1181 fOptions.fSplitLevel, /*dir=*/outputDir);
1182 fOutputHandle->fOutputLoopManager = fOutputLoopManager;
1183 if (fOptions.fAutoFlush)
1184 fOutputHandle->fTree->SetAutoFlush(fOptions.fAutoFlush);
1185
1187
1188 fBranchData.reserve(vbnames.size());
1189 for (unsigned int i = 0; i < vbnames.size(); ++i) {
1190 fOutputHandle->RegisterBranch(outputBranchNames[i], 0);
1191 fBranchData.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
1192 }
1193}
1194
1195/// Register a new column as a variation of the column at `originalColumnIndex`, and clone its properties.
1196/// If a nominal column is registered here, it is written without changes, but it means that it will be masked
1197/// in case its selection cuts don't pass.
1198/// \param slot Task ID for MT runs.
1199/// \param columnIndex Index where the data of this column will be passed into the helper.
1200/// \param originalColumnIndex If the column being registered is a variation of a "nominal" column, this designates the
1201/// original.
1202/// Properties such as name and output type are cloned from the original.
1203/// \param variationName The variation that this column belongs to. If "nominal" is used, this column is considered as
1204/// the original.
1206 unsigned int columnIndex,
1207 unsigned int originalColumnIndex,
1208 unsigned int variationIndex,
1209 std::string const &variationName)
1210{
1212 fBranchData[columnIndex].fVariationIndex = variationIndex; // The base column has variations
1213 fOutputHandle->RegisterBranch(fBranchData[columnIndex].fOutputBranchName, variationIndex);
1214 } else if (columnIndex >= fBranchData.size()) {
1215 // First task, need to create branches
1216 fBranchData.resize(columnIndex + 1);
1217 auto &bd = fBranchData[columnIndex];
1218 bd = fBranchData[originalColumnIndex];
1219 std::string newOutputName = bd.fOutputBranchName + "__" + variationName;
1220 std::replace(newOutputName.begin(), newOutputName.end(), ':', '_');
1221 bd.fOutputBranchName = std::move(newOutputName);
1222 bd.fVariationIndex = variationIndex;
1223
1224 fOutputHandle->RegisterBranch(bd.fOutputBranchName, variationIndex);
1225 } else {
1226 assert(static_cast<unsigned int>(fBranchData[columnIndex].fVariationIndex) == variationIndex);
1227 }
1228}
1229
1230/// Bind all output branches to RDF columns for the given slots.
1232{
1233 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1234 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1235 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
1236 fInputTree = treeDS->GetTree();
1237
1238 // Create all output branches; and bind them to empty values
1239 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
1240 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1241 fBranchData[i].EmptyInstance(/*pointerToPointer=*/false));
1242 }
1243
1244 AssertNoNullBranchAddresses(fBranchData);
1245}
1246
1247/// Connect all output fields to the values pointed to by `values`, fill the output dataset,
1248/// call the Fill of the output tree, and clear the mask bits that show whether a variation was reached.
1249void ROOT::Internal::RDF::SnapshotHelperWithVariations::Exec(unsigned int /*slot*/, const std::vector<void *> &values,
1250 std::vector<bool> const &filterPassed)
1251{
1252 // Rebind branch pointers to RDF values
1253 assert(fBranchData.size() == values.size());
1254 for (std::size_t i = 0; i < values.size(); i++) {
1255 const auto variationIndex = fBranchData[i].fVariationIndex;
1256 if (variationIndex < 0) {
1257 // Branch without variations
1258 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1259 } else if (filterPassed[variationIndex]) {
1260 // Branch with variations
1261 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1262 if (!fundamentalType) {
1263 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1264 }
1265 fOutputHandle->SetMaskBit(variationIndex);
1266 }
1267 }
1268
1269 assert(!fOutputHandle->MaskEmpty()); // Exec should not have been called if nothing passes
1270
1271 fOutputHandle->Write();
1272 fOutputHandle->ClearMaskBits();
1273 for (auto &branchData : fBranchData) {
1274 branchData.ClearBranchContents();
1275 }
1276}
1277
1279{
1280 fOutputHandle.reset();
1281}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:50
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1524
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:68
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3786
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:297
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:402
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:347
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:632
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:206
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10133
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10127
std::vector< std::string > ColumnNames_t
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.