Logo ROOT   6.14/05
Reference Guide
RRootDS.cxx
Go to the documentation of this file.
1 #include <ROOT/RDFUtils.hxx>
2 #include <ROOT/RRootDS.hxx>
3 #include <ROOT/TSeq.hxx>
4 #include <TClass.h>
5 #include <TROOT.h> // For the gROOTMutex
6 #include <TVirtualMutex.h> // For the R__LOCKGUARD
7 #include <ROOT/RMakeUnique.hxx>
8 
9 #include <algorithm>
10 #include <vector>
11 
12 namespace ROOT {
13 
14 namespace RDF {
15 
16 std::vector<void *> RRootDS::GetColumnReadersImpl(std::string_view name, const std::type_info &id)
17 {
18  const auto colTypeName = GetTypeName(name);
19  const auto &colTypeId = ROOT::Internal::RDF::TypeName2TypeID(colTypeName);
20  if (id != colTypeId) {
21  std::string err = "The type of column \"";
22  err += name;
23  err += "\" is ";
24  err += colTypeName;
25  err += " but a different one has been selected.";
26  throw std::runtime_error(err);
27  }
28 
29  const auto index =
30  std::distance(fListOfBranches.begin(), std::find(fListOfBranches.begin(), fListOfBranches.end(), name));
31  std::vector<void *> ret(fNSlots);
32  for (auto slot : ROOT::TSeqU(fNSlots)) {
33  ret[slot] = (void *)&fBranchAddresses[index][slot];
34  }
35  return ret;
36 }
37 
39  : fTreeName(treeName), fFileNameGlob(fileNameGlob), fModelChain(std::string(treeName).c_str())
40 {
41  fModelChain.Add(fFileNameGlob.c_str());
42 
43  const TObjArray &lob = *fModelChain.GetListOfBranches();
44  fListOfBranches.resize(lob.GetEntries());
45 
46  TIterCategory<TObjArray> iter(&lob);
47  std::transform(iter.Begin(), iter.End(), fListOfBranches.begin(), [](TObject *o) { return o->GetName(); });
48 }
49 
51 {
52  for (auto addr : fAddressesToFree) {
53  delete addr;
54  }
55 }
56 
57 std::string RRootDS::GetTypeName(std::string_view colName) const
58 {
59  if (!HasColumn(colName)) {
60  std::string e = "The dataset does not have column ";
61  e += colName;
62  throw std::runtime_error(e);
63  }
64  // TODO: we need to factor out the routine for the branch alone...
65  // Maybe a cache for the names?
66  auto typeName =
67  ROOT::Internal::RDF::ColumnName2ColumnTypeName(std::string(colName), /*nsID=*/0, &fModelChain, /*ds=*/nullptr,
68  /*isCustomCol=*/false);
69  // We may not have yet loaded the library where the dictionary of this type is
70  TClass::GetClass(typeName.c_str());
71  return typeName;
72 }
73 
74 const std::vector<std::string> &RRootDS::GetColumnNames() const
75 {
76  return fListOfBranches;
77 }
78 
80 {
81  if (!fListOfBranches.empty())
83  return fListOfBranches.end() != std::find(fListOfBranches.begin(), fListOfBranches.end(), colName);
84 }
85 
86 void RRootDS::InitSlot(unsigned int slot, ULong64_t firstEntry)
87 {
88  auto chain = new TChain(fTreeName.c_str());
89  chain->ResetBit(kMustCleanup);
90  chain->Add(fFileNameGlob.c_str());
91  chain->GetEntry(firstEntry);
92  TString setBranches;
93  for (auto i : ROOT::TSeqU(fListOfBranches.size())) {
94  auto colName = fListOfBranches[i].c_str();
95  auto &addr = fBranchAddresses[i][slot];
96  auto typeName = GetTypeName(colName);
97  auto typeClass = TClass::GetClass(typeName.c_str());
98  if (typeClass) {
99  chain->SetBranchAddress(colName, &addr, nullptr, typeClass, EDataType(0), true);
100  } else {
101  if (!addr) {
102  addr = new double();
103  fAddressesToFree.emplace_back((double *)addr);
104  }
105  chain->SetBranchAddress(colName, addr);
106  }
107  }
108  fChains[slot].reset(chain);
109 }
110 
111 void RRootDS::FinaliseSlot(unsigned int slot)
112 {
113  fChains[slot].reset(nullptr);
114 }
115 
116 std::vector<std::pair<ULong64_t, ULong64_t>> RRootDS::GetEntryRanges()
117 {
118  auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
119  return entryRanges;
120 }
121 
122 bool RRootDS::SetEntry(unsigned int slot, ULong64_t entry)
123 {
124  fChains[slot]->GetEntry(entry);
125  return true;
126 }
127 
128 void RRootDS::SetNSlots(unsigned int nSlots)
129 {
130  assert(0U == fNSlots && "Setting the number of slots even if the number of slots is different from zero.");
131 
132  fNSlots = nSlots;
133 
134  const auto nColumns = fListOfBranches.size();
135  // Initialise the entire set of addresses
136  fBranchAddresses.resize(nColumns, std::vector<void *>(fNSlots, nullptr));
137 
138  fChains.resize(fNSlots);
139 }
140 
142 {
143  const auto nentries = fModelChain.GetEntries();
144  const auto chunkSize = nentries / fNSlots;
145  const auto reminder = 1U == fNSlots ? 0 : nentries % fNSlots;
146  auto start = 0UL;
147  auto end = 0UL;
148  for (auto i : ROOT::TSeqU(fNSlots)) {
149  start = end;
150  end += chunkSize;
151  fEntryRanges.emplace_back(start, end);
152  (void)i;
153  }
154  fEntryRanges.back().second += reminder;
155 }
156 
158 {
159  ROOT::RDataFrame tdf(std::make_unique<RRootDS>(treeName, fileNameGlob));
160  return tdf;
161 }
162 
163 } // ns RDF
164 
165 } // ns ROOT
An array of TObjects.
Definition: TObjArray.h:37
RRootDS(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:38
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot...
Definition: RRootDS.cxx:122
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset&#39;s column names.
Definition: RRootDS.cxx:74
static TIterCategory End()
Definition: TCollection.h:279
Basic string class.
Definition: TString.h:131
STL namespace.
RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:157
std::vector< double * > fAddressesToFree
Definition: RRootDS.hxx:30
virtual Long64_t GetEntries() const
Return the total number of entries in the chain.
Definition: TChain.cxx:937
std::vector< std::vector< void * > > fBranchAddresses
Definition: RRootDS.hxx:33
std::vector< std::string > fListOfBranches
Definition: RRootDS.hxx:31
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition: RRootDS.cxx:79
TChain fModelChain
Definition: RRootDS.hxx:29
std::string fTreeName
Definition: RRootDS.hxx:27
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition: RRootDS.cxx:57
ROOT&#39;s RDataFrame offers a high level interface for analyses of data stored in TTrees, CSV&#39;s and other data formats.
Definition: RDataFrame.hxx:42
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:184
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition: RRootDS.cxx:116
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
std::string fFileNameGlob
Definition: RRootDS.hxx:28
unsigned long long ULong64_t
Definition: RtypesCore.h:70
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:40
basic_string_view< char > string_view
Definition: RStringView.hxx:35
int nentries
Definition: THbookFile.cxx:89
EDataType
Definition: TDataType.h:28
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2887
Mother of all ROOT objects.
Definition: TObject.h:37
void Initialise()
Convenience method called before starting an event-loop.
Definition: RRootDS.cxx:141
typedef void((*Func_t)())
unsigned int fNSlots
Definition: RRootDS.hxx:26
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
void InitSlot(unsigned int slot, ULong64_t firstEntry)
Convenience method called at the start of the data processing associated to a slot.
Definition: RRootDS.cxx:86
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
TIterCategory & Begin()
Definition: TCollection.h:278
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RRootDS.hxx:32
void ResetBit(UInt_t f)
Definition: TObject.h:171
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition: RRootDS.cxx:128
std::vector< std::unique_ptr< TChain > > fChains
Definition: RRootDS.hxx:34
virtual TObjArray * GetListOfBranches()
Return a pointer to the list of branches of the current tree.
Definition: TChain.cxx:1084
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
Definition: RRootDS.cxx:16
void FinaliseSlot(unsigned int slot)
Convenience method called at the end of the data processing associated to a slot. ...
Definition: RRootDS.cxx:111
char name[80]
Definition: TGX11.cxx:109
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:222