44static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
45static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
53 if (
branchData.fOutputBranch->GetAddress() ==
nullptr)
64 std::string
msg =
"RDataFrame::Snapshot:";
67 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
73 msg.resize(
msg.size() - 2);
74 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
76 msg +=
" not part of the set of branches that are being written out.";
77 throw std::runtime_error(
msg);
106 const auto bname =
leaf->GetName();
139 const auto btype =
leaf->GetTypeName();
143 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
144 "leaf is of type '%s'. This column will not be written out.",
202 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
203 "column will not be written out.",
204 bd.fOutputBranchName.c_str());
215 const std::string &fileName)
223 std::unique_ptr<TFile>
outFile{
TFile::Open(fileName.c_str(),
"UPDATE_WITHOUT_GLOBALREGISTRATION")};
225 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
232 if (
opts.fOverwriteIfExists) {
242 const std::string
msg =
"Snapshot: object \"" +
objName +
"\" already present in file \"" + fileName +
243 "\". If you want to delete the original object and write another, please set the "
244 "'fOverwriteIfExists' option to true in RSnapshotOptions.";
245 throw std::invalid_argument(
msg);
315 throw std::logic_error(
316 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
330 }
else if (options.
fOutputFormat == OutputFormat::kRNTuple) {
337 throw std::invalid_argument(
"RDataFrame::Snapshot: unrecognized output format");
343 if (
mode.find(
"_WITHOUT_GLOBALREGISTRATION") != std::string::npos) {
346 return mode +
"_WITHOUT_GLOBALREGISTRATION";
352 const std::type_info *
typeID)
377 auto &
dynamic = std::get<EmptyDynamicType>(fTypeData);
385 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
391 return &
dynamic.fRawPtrToEmptyInstance;
393 return dynamic.fEmptyInstance.get();
410 fOutputBranch->SetAddress(EmptyInstance(
true));
418 const std::vector<const std::type_info *> &
colTypeIDs)
430 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
443 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
447 return checkupdate ==
"update" ?
"updated" :
"created";
450 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
451 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
452 "its result in a variable and for example calling the GetValue() method on it.",
462 fInputTree =
treeDS->GetTree();
463 fBranchAddressesNeedReset =
true;
468 if (!fBranchAddressesNeedReset) {
469 UpdateCArraysPtrs(values);
472 fBranchAddressesNeedReset =
false;
484 assert(values.size() == fBranchData.size());
487 if (fBranchData[i].fIsCArray) {
491 if (
auto *
data =
rawRVec->data(); fBranchData[i].fBranchAddressForCArrays !=
data) {
492 fBranchData[i].fOutputBranch->SetAddress(
data);
493 fBranchData[i].fBranchAddressForCArrays =
data;
502 assert(fBranchData.size() == values.size());
503 for (std::size_t i = 0; i < fBranchData.size(); i++) {
504 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
512 for (std::size_t i = 0; i < fBranchData.size(); i++) {
522 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
525 if (!fDirName.empty()) {
529 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
531 outputDir = fOutputFile->mkdir(fDirName.c_str());
534 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
536 if (fOptions.fAutoFlush)
537 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
542 assert(fOutputTree !=
nullptr);
543 assert(fOutputFile !=
nullptr);
547 if (fOutputTree->GetEntries() == 0) {
548 SetEmptyBranches(fInputTree, *fOutputTree);
551 fOutputTree->AutoSave(
"flushbaskets");
554 fOutputFile->Close();
557 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
558 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
575 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
580 for (
const auto &
bd : fBranchData) {
581 if (
bd.fInputBranchName.empty())
605 const std::vector<const std::type_info *> &
colTypeIDs)
607 fOutputFiles(fNSlots),
608 fOutputTrees(fNSlots),
609 fBranchAddressesNeedReset(fNSlots, 1),
610 fInputTrees(fNSlots),
626 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
640 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
641 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
645 return checkupdate ==
"update" ?
"updated" :
"created";
648 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
649 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
650 "its result in a variable and for example calling the GetValue() method on it.",
658 if (!fOutputFiles[
slot]) {
660 fOutputFiles[
slot] = fMerger->GetFile();
663 if (!fDirName.empty()) {
670 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
673 fOutputTrees[
slot]->SetImplicitMT(
false);
674 if (fOptions.fAutoFlush)
675 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
678 fInputTrees[
slot] =
r->GetTree();
682 fBranchAddressesNeedReset[
slot] = 1;
687 if (fOutputTrees[
slot]->GetEntries() > 0)
688 fOutputFiles[
slot]->Write();
692 fOutputTrees[
slot].reset(
nullptr);
697 if (fBranchAddressesNeedReset[
slot] == 0) {
698 UpdateCArraysPtrs(
slot, values);
700 SetBranches(
slot, values);
701 fBranchAddressesNeedReset[
slot] = 0;
703 fOutputTrees[
slot]->Fill();
704 auto entries = fOutputTrees[
slot]->GetEntries();
707 fOutputFiles[
slot]->Write();
711 const std::vector<void *> &values)
717 assert(values.size() == fBranchData[
slot].size());
735 const std::vector<void *> &values)
740 for (std::size_t i = 0; i <
branchData.size(); i++) {
751 for (std::size_t i = 0; i <
branchData.size(); i++) {
762 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
764 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
770 for (
auto &file : fOutputFiles) {
779 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
780 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
786 if (!fDirName.empty()) {
787 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
793 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
799 fOutputFile->Write();
803 fOutputFiles.clear();
807 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
824 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
829 for (
const auto &
bd : fBranchData.front()) {
830 if (
bd.fInputBranchName.empty())
855 const std::vector<const std::type_info *> &
colTypeIDs)
880 if (!fNTupleName.empty() && !fOutputFile && fOptions.fLazy)
881 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
887 auto nFields = fOutputFieldNames.size();
895 fInputFieldNames[i], fOptions.fVector2RVec)
899 if (typeName.substr(0, 25) ==
"ROOT::RNTupleCardinality<") {
901 std::string
cardinalityType = typeName.substr(25, typeName.size() - 26);
903 "Column \"%s\" is a read-only \"%s\" column. It will be snapshot as its inner type \"%s\" instead.",
904 fInputFieldNames[i].c_str(), typeName.c_str(),
cardinalityType.c_str());
909 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
915 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
916 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
917 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
918 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
919 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
920 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
924 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
927 if (!fDirName.empty()) {
931 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
933 outputDir = fOutputFile->mkdir(fDirName.c_str());
943 if (!fFillContexts[
slot]) {
944 fFillContexts[
slot] = fWriter->CreateFillContext();
945 fEntries[
slot] = fFillContexts[
slot]->GetModel().CreateBareEntry();
953 assert(values.size() == fFieldTokens.size());
954 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
955 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
964 fFillContexts[
slot]->FlushCluster();
971 fFillContexts.clear();
975 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
992 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
994 fNSlots,
finalName, fDirName, fNTupleName, fInputFieldNames,
995 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
1031 (std::string{
"R_rdf_column_to_bitmask_mapping_"} +
fTree->GetName()).c_str());
1035 fTree->AutoSave(
"flushbaskets");
1038 std::string tree =
fTree->GetName();
1041 std::string file =
fFile->GetName();
1064 throw std::logic_error(
"Branch " + branchName +
1065 " is being registered with different variation index than the expected one: " +
1079 std::string{
"R_rdf_mask_"} +
fTree->GetName() +
'_' + std::to_string(
fBitMasks.size());
1092 mask.bitset.reset();
1113 throw std::runtime_error(
"The TTree associated to the Snapshot action doesn't exist, any more.");
1116 *
mask.branchBuffer =
mask.bitset.to_ullong();
1129 const std::vector<const std::type_info *> &
colTypeIDs)
1139 throw std::runtime_error(std::string{
"Snapshot: could not create output file "} + std::string{
filename});
1162 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
1212 fInputTree =
treeDS->GetTree();
1215 for (std::size_t i = 0; i < fBranchData.size(); i++) {
1216 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1217 fBranchData[i].EmptyInstance(
false));
1229 assert(fBranchData.size() == values.size());
1230 for (std::size_t i = 0; i < values.size(); i++) {
1234 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1238 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1240 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1250 assert(!fOutputHandle->MaskEmpty());
1252 fOutputHandle->Write();
1253 fOutputHandle->ClearMaskBits();
1261 fOutputHandle.reset();
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
RSnapshotOptions fOptions
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
std::vector< RBranchData > fBranchData
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
void FinalizeTask(unsigned int slot)
RSnapshotOptions fOptions
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
std::vector< RBranchData > fBranchData
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
std::unique_ptr< uint64_t > branchBuffer
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
std::unique_ptr< TTree > fTree
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
RLoopManager * fOutputLoopManager
std::string fDirectoryName
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
std::unique_ptr< TFile > fFile
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
SnapshotOutputWriter(TFile *file)
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
std::vector< Bitmask > fBitMasks
Tag to let data sources use the native data type when creating a column reader.
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.