Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RTTreeDS.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RTTreeDS.hxx
3 \ingroup dataframe
4 \author Vincenzo Eduardo Padulano
5 \date 2024-12
6*/
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_INTERNAL_RDF_RTTREEDS
17#define ROOT_INTERNAL_RDF_RTTREEDS
18
19#include "ROOT/RDataSource.hxx"
20
21#include <memory>
22#include <string>
23#include <vector>
24#include <stdexcept>
25#include <string_view>
26
27// Begin forward decls
28
29namespace ROOT {
30class RDataFrame;
31}
32
33namespace ROOT::Detail::RDF {
34class RLoopManager;
35}
36
37namespace ROOT::RDF {
38class RSampleInfo;
39}
40
42class RSample;
43}
44
45namespace ROOT::TreeUtils {
46struct RFriendInfo;
47}
48
49class TChain;
50class TDirectory;
51class TTree;
52class TTreeReader;
53
54// End forward decls
55
56namespace ROOT::Internal::RDF {
57
59 std::vector<std::string> fBranchNamesWithDuplicates{};
60 std::vector<std::string> fBranchNamesWithoutDuplicates{};
61 std::vector<std::string> fTopLevelBranchNames{};
62
63 std::shared_ptr<TTree> fTree;
64
65 std::unique_ptr<TTreeReader> fTreeReader;
66
67 std::vector<std::unique_ptr<TChain>> fFriends;
68
70 CreateSampleInfo(const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap) const final;
71
72 void RunFinalChecks(bool nodesLeftNotRun) const final;
73
74 void Setup(std::shared_ptr<TTree> &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo = nullptr);
75
76 std::vector<std::pair<ULong64_t, ULong64_t>> GetTTreeEntryRange(TTree &tree);
77 std::vector<std::pair<ULong64_t, ULong64_t>> GetTChainEntryRange(TChain &chain);
78
79public:
80 RTTreeDS(std::shared_ptr<TTree> tree);
81 RTTreeDS(std::shared_ptr<TTree> tree, const ROOT::TreeUtils::RFriendInfo &friendInfo);
82 RTTreeDS(std::string_view treeName, TDirectory *dirPtr);
83 RTTreeDS(std::string_view treeName, std::string_view fileNameGlob);
84 RTTreeDS(std::string_view treeName, const std::vector<std::string> &fileNameGlobs);
85
86 // Rule of five
87 RTTreeDS(const RTTreeDS &) = delete;
88 RTTreeDS &operator=(const RTTreeDS &) = delete;
89 RTTreeDS(RTTreeDS &&) = delete;
91 ~RTTreeDS() final; // Define destructor where data member types are defined
92
93 void Initialize() final;
94
95 void Finalize() final;
96
97 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
98
99 const std::vector<std::string> &GetColumnNames() const final { return fBranchNamesWithDuplicates; }
100
101 bool HasColumn(std::string_view colName) const final
102 {
103 return std::find(fBranchNamesWithDuplicates.begin(), fBranchNamesWithDuplicates.end(), colName) !=
105 }
106
107 std::string GetTypeName(std::string_view colName) const final;
108
109 std::string GetTypeNameWithOpts(std::string_view colName, bool vector2RVec) const final;
110
111 bool SetEntry(unsigned int, ULong64_t entry) final;
112
113 Record_t GetColumnReadersImpl(std::string_view /* name */, const std::type_info & /* ti */) final
114 {
115 // This datasource uses the newer GetColumnReaders() API
116 return {};
117 }
118
119 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
120 GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
121 {
122 // This data source creates column readers via CreateColumnReader
123 throw std::runtime_error("GetColumnReaders should not be called on this data source, something wrong happened!");
124 }
125
126 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase> CreateColumnReader(unsigned int slot, std::string_view col,
127 const std::type_info &tid,
128 TTreeReader *treeReader) final;
129
130 std::string GetLabel() final { return "TTreeDS"; }
131
132 TTree *GetTree();
133
134 const std::vector<std::string> &GetTopLevelFieldNames() const final { return fTopLevelBranchNames; }
135
136 const std::vector<std::string> &GetColumnNamesNoDuplicates() const final { return fBranchNamesWithoutDuplicates; }
137
138 void InitializeWithOpts(const std::set<std::string> &suppressErrorsForMissingBranches) final;
139
140 std::string DescribeDataset() final;
141
142 std::string AsString() final { return "TTree data source"; }
143
144 std::size_t GetNFiles() const final;
145
146 void ProcessMT(ROOT::Detail::RDF::RLoopManager &lm) final;
147};
148
149ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob);
150ROOT::RDataFrame FromTTree(std::string_view treeName, const std::vector<std::string> &fileNameGlobs);
151
152} // namespace ROOT::Internal::RDF
153
154#endif
unsigned long long ULong64_t
Definition RtypesCore.h:70
The head node of a RDF computation graph.
std::vector< std::pair< ULong64_t, ULong64_t > > GetTTreeEntryRange(TTree &tree)
Definition RTTreeDS.cxx:338
std::vector< std::string > fBranchNamesWithoutDuplicates
Definition RTTreeDS.hxx:60
RTTreeDS & operator=(RTTreeDS &&)=delete
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
Definition RTTreeDS.cxx:205
std::string DescribeDataset() final
Definition RTTreeDS.cxx:214
std::string GetLabel() final
Return a string representation of the datasource type.
Definition RTTreeDS.hxx:130
const std::vector< std::string > & GetColumnNamesNoDuplicates() const final
Definition RTTreeDS.hxx:136
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition RTTreeDS.hxx:99
void Setup(std::shared_ptr< TTree > &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo=nullptr)
Definition RTTreeDS.cxx:67
std::vector< std::unique_ptr< TChain > > fFriends
Definition RTTreeDS.hxx:67
void Initialize() final
Convenience method called before starting an event-loop.
Definition RTTreeDS.cxx:400
const std::vector< std::string > & GetTopLevelFieldNames() const final
Definition RTTreeDS.hxx:134
RTTreeDS & operator=(const RTTreeDS &)=delete
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition RTTreeDS.hxx:101
std::unique_ptr< TTreeReader > fTreeReader
Definition RTTreeDS.hxx:65
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Definition RTTreeDS.hxx:120
void Finalize() final
Convenience method called after concluding an event-loop.
Definition RTTreeDS.cxx:392
RTTreeDS(RTTreeDS &&)=delete
RTTreeDS(const RTTreeDS &)=delete
bool SetEntry(unsigned int, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RTTreeDS.cxx:304
void InitializeWithOpts(const std::set< std::string > &suppressErrorsForMissingBranches) final
Definition RTTreeDS.cxx:414
std::vector< std::string > fTopLevelBranchNames
Definition RTTreeDS.hxx:61
std::vector< std::pair< ULong64_t, ULong64_t > > GetTChainEntryRange(TChain &chain)
Definition RTTreeDS.cxx:348
Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition RTTreeDS.hxx:113
std::vector< std::string > fBranchNamesWithDuplicates
Definition RTTreeDS.hxx:59
RTTreeDS(std::shared_ptr< TTree > tree)
Definition RTTreeDS.cxx:87
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition RTTreeDS.cxx:369
std::string AsString() final
Definition RTTreeDS.hxx:142
std::shared_ptr< TTree > fTree
Definition RTTreeDS.hxx:63
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition RTTreeDS.cxx:325
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
friend std::string ROOT::Internal::RDF::GetTypeNameWithOpts(const RDataSource &, std::string_view, bool)
friend ROOT::RDF::RSampleInfo ROOT::Internal::RDF::CreateSampleInfo(const ROOT::RDF::RDataSource &, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &)
std::vector< void * > Record_t
friend std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > ROOT::Internal::RDF::CreateColumnReader(ROOT::RDF::RDataSource &, unsigned int, std::string_view, const std::type_info &, TTreeReader *)
friend void ROOT::Internal::RDF::ProcessMT(RDataSource &, ROOT::Detail::RDF::RLoopManager &)
friend void ROOT::Internal::RDF::RunFinalChecks(const ROOT::RDF::RDataSource &, bool)
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
Describe directory structure in memory.
Definition TDirectory.h:45
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:79
ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob)
Definition RTTreeDS.cxx:133
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Information about friend trees of a certain TTree or TChain object.