51 if (
branchData.fOutputBranch->GetAddress() ==
nullptr)
62 std::string
msg =
"RDataFrame::Snapshot:";
65 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
71 msg.resize(
msg.size() - 2);
72 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
74 msg +=
" not part of the set of branches that are being written out.";
75 throw std::runtime_error(
msg);
104 const auto bname =
leaf->GetName();
138 const auto btype =
leaf->GetTypeName();
142 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
143 "leaf is of type '%s'. This column will not be written out.",
201 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
202 "column will not be written out.",
203 bd.fOutputBranchName.c_str());
214 const std::string &fileName)
224 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
231 if (
opts.fOverwriteIfExists) {
232 if (
outTree->InheritsFrom(
"TTree")) {
238 const std::string
msg =
"Snapshot: tree \"" +
treeName +
"\" already present in file \"" + fileName +
239 "\". If you want to delete the original tree and write another, please set "
240 "RSnapshotOptions::fOverwriteIfExists to true.";
241 throw std::invalid_argument(
msg);
249 const std::string &fileName)
259 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
264 if (
opts.fOverwriteIfExists) {
268 const std::string
msg =
"Snapshot: RNTuple \"" +
ntupleName +
"\" already present in file \"" + fileName +
269 "\". If you want to delete the original ntuple and write another, please set "
270 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
271 throw std::invalid_argument(
msg);
282 if (
opts.fOverwriteIfExists) {
289 const std::string
msg =
"Snapshot: object \"" +
ntupleName +
"\" already present in file \"" + fileName +
290 "\". If you want to delete the original object and write a new RNTuple, please set "
291 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
292 throw std::invalid_argument(
msg);
362 throw std::logic_error(
363 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
371 const std::vector<const std::type_info *> &
colTypeIDs)
383 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
396 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
400 return checkupdate ==
"update" ?
"updated" :
"created";
403 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
404 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
405 "its result in a variable and for example calling the GetValue() method on it.",
415 fInputTree =
treeDS->GetTree();
416 fBranchAddressesNeedReset =
true;
421 if (!fBranchAddressesNeedReset) {
422 UpdateCArraysPtrs(values);
425 fBranchAddressesNeedReset =
false;
437 assert(values.size() == fBranchData.size());
440 if (fBranchData[i].fIsCArray) {
444 if (
auto *
data =
rawRVec->data(); fBranchData[i].fBranchAddressForCArrays !=
data) {
445 fBranchData[i].fOutputBranch->SetAddress(
data);
446 fBranchData[i].fBranchAddressForCArrays =
data;
455 assert(fBranchData.size() == values.size());
456 for (std::size_t i = 0; i < fBranchData.size(); i++) {
457 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
465 for (std::size_t i = 0; i < fBranchData.size(); i++) {
473 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(),
"",
476 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
479 if (!fDirName.empty()) {
483 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
485 outputDir = fOutputFile->mkdir(fDirName.c_str());
488 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
490 if (fOptions.fAutoFlush)
491 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
496 assert(fOutputTree !=
nullptr);
497 assert(fOutputFile !=
nullptr);
501 if (fOutputTree->GetEntries() == 0) {
502 SetEmptyBranches(fInputTree, *fOutputTree);
505 fOutputTree->AutoSave(
"flushbaskets");
508 fOutputFile->Close();
511 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
512 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
529 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
534 for (
const auto &
bd : fBranchData) {
535 if (
bd.fInputBranchName.empty())
559 const std::vector<const std::type_info *> &
colTypeIDs)
561 fOutputFiles(fNSlots),
562 fOutputTrees(fNSlots),
563 fBranchAddressesNeedReset(fNSlots, 1),
564 fInputTrees(fNSlots),
580 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
594 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
595 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
599 return checkupdate ==
"update" ?
"updated" :
"created";
602 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
603 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
604 "its result in a variable and for example calling the GetValue() method on it.",
612 if (!fOutputFiles[
slot]) {
614 fOutputFiles[
slot] = fMerger->GetFile();
617 if (!fDirName.empty()) {
624 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
627 fOutputTrees[
slot]->SetImplicitMT(
false);
628 if (fOptions.fAutoFlush)
629 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
632 fInputTrees[
slot] =
r->GetTree();
636 fBranchAddressesNeedReset[
slot] = 1;
641 if (fOutputTrees[
slot]->GetEntries() > 0)
642 fOutputFiles[
slot]->Write();
646 fOutputTrees[
slot].reset(
nullptr);
651 if (fBranchAddressesNeedReset[
slot] == 0) {
652 UpdateCArraysPtrs(
slot, values);
654 SetBranches(
slot, values);
655 fBranchAddressesNeedReset[
slot] = 0;
657 fOutputTrees[
slot]->Fill();
658 auto entries = fOutputTrees[
slot]->GetEntries();
661 fOutputFiles[
slot]->Write();
665 const std::vector<void *> &values)
671 assert(values.size() == fBranchData[
slot].size());
689 const std::vector<void *> &values)
694 for (std::size_t i = 0; i <
branchData.size(); i++) {
705 for (std::size_t i = 0; i <
branchData.size(); i++) {
714 std::unique_ptr<TFile>{
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), fFileName.c_str(),
cs)};
716 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
718 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
724 for (
auto &file : fOutputFiles) {
733 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
734 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
739 if (!fDirName.empty()) {
740 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
746 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
752 fOutputFile->Write();
756 fOutputFiles.clear();
760 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
777 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
782 for (
const auto &
bd : fBranchData.front()) {
783 if (
bd.fInputBranchName.empty())
808 const std::vector<const std::type_info *> &
colTypeIDs)
833 if (!fNTupleName.empty() && !fOutputFile && fOptions.fLazy)
834 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
840 auto nFields = fOutputFieldNames.size();
848 fInputFieldNames[i], fOptions.fVector2RVec)
851 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
856 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
858 fOutputFile.reset(
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
860 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
863 if (!fDirName.empty()) {
867 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
869 outputDir = fOutputFile->mkdir(fDirName.c_str());
879 if (!fFillContexts[
slot]) {
880 fFillContexts[
slot] = fWriter->CreateFillContext();
881 fEntries[
slot] = fFillContexts[
slot]->GetModel().CreateBareEntry();
889 assert(values.size() == fFieldTokens.size());
890 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
891 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
900 fFillContexts[
slot]->FlushCluster();
907 fFillContexts.clear();
911 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
928 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
930 fNSlots,
finalName, fDirName, fNTupleName, fInputFieldNames,
931 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
void FinalizeTask(unsigned int slot)
RSnapshotOptions fOptions
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
std::vector< RBranchData > fBranchData
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Mother of all ROOT objects.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores properties of each output branch in a Snapshot.
Tag to let data sources use the native data type when creating a column reader.
A collection of options to steer the creation of the dataset on file.