Logo ROOT  
Reference Guide
RRootDS.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
9#include <ROOT/RDF/Utils.hxx>
10#include <ROOT/RRootDS.hxx>
11#include <ROOT/TSeq.hxx>
12#include <TClass.h>
13#include <TError.h>
14#include <TROOT.h> // For the gROOTMutex
15#include <TVirtualMutex.h> // For the R__LOCKGUARD
16
17#include <algorithm>
18#include <vector>
19
20namespace ROOT {
21
22namespace Internal {
23
24namespace RDF {
25
26std::vector<void *> RRootDS::GetColumnReadersImpl(std::string_view name, const std::type_info &id)
27{
28 const auto colTypeName = GetTypeName(name);
29 const auto &colTypeId = ROOT::Internal::RDF::TypeName2TypeID(colTypeName);
30 if (id != colTypeId) {
31 std::string err = "The type of column \"";
32 err += name;
33 err += "\" is ";
34 err += colTypeName;
35 err += " but a different one has been selected.";
36 throw std::runtime_error(err);
37 }
38
39 const auto index =
40 std::distance(fListOfBranches.begin(), std::find(fListOfBranches.begin(), fListOfBranches.end(), name));
41 std::vector<void *> ret(fNSlots);
42 for (auto slot : ROOT::TSeqU(fNSlots)) {
43 ret[slot] = (void *)&fBranchAddresses[index][slot];
44 }
45 return ret;
46}
47
49 : fTreeName(treeName), fFileNameGlob(fileNameGlob), fModelChain(std::string(treeName).c_str())
50{
52
55
56 TIterCategory<TObjArray> iter(&lob);
57 std::transform(iter.Begin(), iter.End(), fListOfBranches.begin(), [](TObject *o) { return o->GetName(); });
58}
59
61{
62 for (auto addr : fAddressesToFree) {
63 delete addr;
64 }
65}
66
67std::string RRootDS::GetTypeName(std::string_view colName) const
68{
69 if (!HasColumn(colName)) {
70 std::string e = "The dataset does not have column ";
71 e += colName;
72 throw std::runtime_error(e);
73 }
74 // TODO: we need to factor out the routine for the branch alone...
75 // Maybe a cache for the names?
76 auto typeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName(std::string(colName), &fModelChain, /*ds=*/nullptr,
77 /*define=*/nullptr);
78 // We may not have yet loaded the library where the dictionary of this type is
79 TClass::GetClass(typeName.c_str());
80 return typeName;
81}
82
83const std::vector<std::string> &RRootDS::GetColumnNames() const
84{
85 return fListOfBranches;
86}
87
89{
90 if (!fListOfBranches.empty())
92 return fListOfBranches.end() != std::find(fListOfBranches.begin(), fListOfBranches.end(), colName);
93}
94
95void RRootDS::InitSlot(unsigned int slot, ULong64_t firstEntry)
96{
97 auto chain = new TChain(fTreeName.c_str());
98 chain->ResetBit(kMustCleanup);
99 chain->Add(fFileNameGlob.c_str());
100 chain->GetEntry(firstEntry);
101 TString setBranches;
102 for (auto i : ROOT::TSeqU(fListOfBranches.size())) {
103 auto colName = fListOfBranches[i].c_str();
104 auto &addr = fBranchAddresses[i][slot];
105 auto typeName = GetTypeName(colName);
106 auto typeClass = TClass::GetClass(typeName.c_str());
107 if (typeClass) {
108 chain->SetBranchAddress(colName, &addr, nullptr, typeClass, EDataType(0), true);
109 } else {
110 if (!addr) {
111 addr = new double();
112 fAddressesToFree.emplace_back((double *)addr);
113 }
114 chain->SetBranchAddress(colName, addr);
115 }
116 }
117 fChains[slot].reset(chain);
118}
119
120void RRootDS::FinalizeSlot(unsigned int slot)
121{
122 fChains[slot].reset(nullptr);
123}
124
125std::vector<std::pair<ULong64_t, ULong64_t>> RRootDS::GetEntryRanges()
126{
127 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
128 return entryRanges;
129}
130
131bool RRootDS::SetEntry(unsigned int slot, ULong64_t entry)
132{
133 fChains[slot]->GetEntry(entry);
134 return true;
135}
136
137void RRootDS::SetNSlots(unsigned int nSlots)
138{
139 assert(0U == fNSlots && "Setting the number of slots even if the number of slots is different from zero.");
140
141 fNSlots = nSlots;
142
143 const auto nColumns = fListOfBranches.size();
144 // Initialize the entire set of addresses
145 fBranchAddresses.resize(nColumns, std::vector<void *>(fNSlots, nullptr));
146
147 fChains.resize(fNSlots);
148}
149
151{
152 const auto nentries = fModelChain.GetEntries();
153 const auto chunkSize = nentries / fNSlots;
154 const auto reminder = 1U == fNSlots ? 0 : nentries % fNSlots;
155 auto start = 0UL;
156 auto end = 0UL;
157 for (auto i : ROOT::TSeqU(fNSlots)) {
158 start = end;
159 end += chunkSize;
160 fEntryRanges.emplace_back(start, end);
161 (void)i;
162 }
163 fEntryRanges.back().second += reminder;
164}
165
166std::string RRootDS::GetLabel()
167{
168 return "Root";
169}
170
172{
173 return ROOT::RDataFrame(treeName, fileNameGlob);
174}
175
176} // ns RDF
177
178} // ns Internal
179
180} // ns ROOT
#define e(i)
Definition: RSha256.hxx:103
unsigned long long ULong64_t
Definition: RtypesCore.h:81
EDataType
Definition: TDataType.h:28
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition: TGX11.cxx:110
int nentries
Definition: THbookFile.cxx:91
@ kMustCleanup
Definition: TObject.h:355
void FinalizeSlot(unsigned int slot)
Convenience method called at the end of the data processing associated to a slot.
Definition: RRootDS.cxx:120
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition: RRootDS.cxx:88
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
Definition: RRootDS.cxx:83
std::string GetLabel()
Return a string representation of the datasource type.
Definition: RRootDS.cxx:166
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition: RRootDS.cxx:67
RRootDS(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:48
std::vector< std::vector< void * > > fBranchAddresses
Definition: RRootDS.hxx:37
std::vector< std::unique_ptr< TChain > > fChains
Definition: RRootDS.hxx:38
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
Definition: RRootDS.cxx:26
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition: RRootDS.cxx:137
std::vector< std::string > fListOfBranches
Definition: RRootDS.hxx:35
std::vector< double * > fAddressesToFree
Definition: RRootDS.hxx:34
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition: RRootDS.cxx:125
void Initialize()
Convenience method called before starting an event-loop.
Definition: RRootDS.cxx:150
void InitSlot(unsigned int slot, ULong64_t firstEntry)
Convenience method called at the start of the data processing associated to a slot.
Definition: RRootDS.cxx:95
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RRootDS.hxx:36
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RRootDS.cxx:131
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:67
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
virtual TObjArray * GetListOfBranches()
Return a pointer to the list of branches of the current tree.
Definition: TChain.cxx:1105
virtual Long64_t GetEntries() const
Return the total number of entries in the chain.
Definition: TChain.cxx:958
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:218
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2968
TIterCategory & Begin()
Definition: TCollection.h:287
static TIterCategory End()
Definition: TCollection.h:288
An array of TObjects.
Definition: TObjArray.h:31
Int_t GetEntriesUnsafe() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:565
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:136
basic_string_view< char > string_view
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:51
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:171
void(off) SmallVectorTemplateBase< T
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.