Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/InternalTreeUtils.hxx> // GetFriendInfo, GetFileNamesFromTree
14#include <ROOT/RDF/Utils.hxx>
16#include <string_view>
17#include <TTree.h>
18
19#include <algorithm> // std::for_each
20#include <iomanip> // std::setw
21#include <memory>
22#include <set>
23#include <sstream>
24#include <string>
25#include <unordered_set>
26
28{
29 // TTree/TChain as input
30 if (const auto *tree = fLoopManager->GetTree()) {
31 if (!dynamic_cast<const TChain *>(tree) && !tree->GetCurrentFile()) {
32 // in-memory TTree
33 return 0;
34 }
36 }
37 // Datasource as input
38 if (auto dataSource = GetDataSource()) {
39 return dataSource->GetNFiles();
40 }
41 return 0;
42}
43
45{
46 // Datasource as input
47 if (auto ds = GetDataSource()) {
49 }
50 // Trivial/empty datasource
51 else {
52 const auto n = fLoopManager->GetNEmptyEntries();
53 if (n == 1) {
54 return "Empty dataframe filling 1 row";
55 } else {
56 return "Empty dataframe filling " + std::to_string(n) + " rows";
57 }
58 }
59}
60
61ROOT::RDF::RInterfaceBase::RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm)
62 : fLoopManager(lm), fColRegister(lm.get())
63{
65}
66
68 : fLoopManager(std::shared_ptr<ROOT::Detail::RDF::RLoopManager>{&lm, [](ROOT::Detail::RDF::RLoopManager *) {}}),
69 fColRegister(colRegister)
70{
71}
72
73/////////////////////////////////////////////////////////////////////////////
74/// \brief Returns the names of the available columns.
75/// \return the container of column names.
76///
77/// This is not an action nor a transformation, just a query to the RDataFrame object.
78///
79/// ### Example usage:
80/// ~~~{.cpp}
81/// auto colNames = d.GetColumnNames();
82/// // Print columns' names
83/// for (auto &&colName : colNames) std::cout << colName << std::endl;
84/// ~~~
85///
87{
88 // there could be duplicates between Redefined columns and columns in the data source
89 std::unordered_set<std::string> allColumns;
90
91 auto addIfNotInternal = [&allColumns](std::string_view colName) {
93 allColumns.emplace(colName);
94 };
95
96 auto definedColumns = fColRegister.GenerateColumnNames();
97
99
100 auto tree = fLoopManager->GetTree();
101 if (tree) {
102 for (const auto &bName : RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false))
103 allColumns.emplace(bName);
104 }
105
106 if (auto ds = GetDataSource()) {
108 if (s.rfind("R_rdf_sizeof", 0) != 0)
109 allColumns.emplace(s);
110 }
111 }
112
114 std::sort(ret.begin(), ret.end());
115 return ret;
116}
117
118/////////////////////////////////////////////////////////////////////////////
119/// \brief Return the type of a given column as a string.
120/// \return the type of the required column.
121///
122/// This is not an action nor a transformation, just a query to the RDataFrame object.
123///
124/// ### Example usage:
125/// ~~~{.cpp}
126/// auto colType = d.GetColumnType("columnName");
127/// // Print column type
128/// std::cout << "Column " << colType << " has type " << colType << std::endl;
129/// ~~~
130///
131std::string ROOT::RDF::RInterfaceBase::GetColumnType(std::string_view column)
132{
133 const auto col = fColRegister.ResolveAlias(column);
134
135 RDFDetail::RDefineBase *define = fColRegister.GetDefine(col);
136
137 const bool convertVector2RVec = true;
138 return RDFInternal::ColumnName2ColumnTypeName(std::string(col), fLoopManager->GetTree(),
139 fLoopManager->GetDataSource(), define, convertVector2RVec);
140}
141
142/////////////////////////////////////////////////////////////////////////////
143/// \brief Return information about the dataframe.
144/// \return information about the dataframe as RDFDescription object
145///
146/// This convenience function describes the dataframe and combines the following information:
147/// - Number of event loops run, see GetNRuns()
148/// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
149/// - Column names, see GetColumnNames()
150/// - Column types, see GetColumnType()
151/// - Number of processing slots, see GetNSlots()
152///
153/// This is not an action nor a transformation, just a query to the RDataFrame object.
154/// The result is dependent on the node from which this method is called, e.g. the list of
155/// defined columns returned by GetDefinedColumnNames().
156///
157/// Please note that this is a convenience feature and the layout of the output can be subject
158/// to change and should be parsed via RDFDescription methods.
159///
160/// ### Example usage:
161/// ~~~{.cpp}
162/// RDataFrame df(10);
163/// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
164/// // Describe the dataframe
165/// df2.Describe().Print()
166/// df2.Describe().Print(/*shortFormat=*/true)
167/// std::cout << df2.Describe().AsString() << std::endl;
168/// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
169/// ~~~
170///
172{
173 // Build set of defined column names to find later in all column names
174 // the defined columns more efficiently
175 const auto columnNames = GetColumnNames();
176 std::set<std::string> definedColumnNamesSet;
177 for (const auto &name : GetDefinedColumnNames())
179
180 // Get information for the metadata table
181 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines", "Event loops run",
182 "Processing slots"};
183 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
184 std::to_string(definedColumnNamesSet.size()),
185 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
186
187 // Set header for metadata table
189 // The column width of the values is required to make right-bound numbers and is equal
190 // to the maximum of the string "Value" and all values to be put in this column.
191 const auto columnWidthValues =
192 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
193 std::stringstream ss;
194 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
195 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
196
197 // Build metadata table
198 // All numbers should be bound to the right and strings bound to the left.
199 for (auto i = 0u; i < metadataProperties.size(); i++) {
200 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
201 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
202 }
203 ss << '\n'; // put space between this and the next table
204
205 // Set header for columns table
207 const auto columnTypes = GetColumnTypeNamesList(columnNames);
209 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
210 << "Origin\n"
211 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
212 << "------\n";
213
214 // Build columns table
215 const auto nCols = columnNames.size();
216 for (auto i = 0u; i < nCols; i++) {
217 auto origin = "Dataset";
219 origin = "Define";
220 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
221 << origin << '\n';
222 }
223 // Use the string returned from DescribeDataset() as the 'brief' description
224 // Use the converted to string stringstream ss as the 'full' description
225 return RDFDescription(DescribeDataset(), ss.str(), GetNFiles());
226}
227
228/// \brief Returns the names of the defined columns.
229/// \return the container of the defined column names.
230///
231/// This is not an action nor a transformation, just a simple utility to
232/// get the columns names that have been defined up to the node.
233/// If no column has been defined, e.g. on a root node, it returns an
234/// empty collection.
235///
236/// ### Example usage:
237/// ~~~{.cpp}
238/// auto defColNames = d.GetDefinedColumnNames();
239/// // Print defined columns' names
240/// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
241/// ~~~
242///
244{
246
247 const auto columns = fColRegister.BuildDefineNames();
248 for (const auto &column : columns) {
250 definedColumns.emplace_back(column);
251 }
252
253 return definedColumns;
254}
255
256/// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
257///
258/// This is not an action nor a transformation, just a simple utility to
259/// inspect the systematic variations that have been registered with Vary() up to this node.
260/// When called on the root node, it returns an empty descriptor.
261///
262/// ### Example usage:
263/// ~~~{.cpp}
264/// auto variations = d.GetVariations();
265/// variations.Print();
266/// ~~~
267///
269{
270 return fColRegister.BuildVariationsDescription();
271}
272
273/// \brief Checks if a column is present in the dataset.
274/// \return true if the column is available, false otherwise
275///
276/// This method checks if a column is part of the input ROOT dataset, has
277/// been defined or can be provided by the data source.
278///
279/// Example usage:
280/// ~~~{.cpp}
281/// ROOT::RDataFrame base(1);
282/// auto rdf = base.Define("definedColumn", [](){return 0;});
283/// rdf.HasColumn("definedColumn"); // true: we defined it
284/// rdf.HasColumn("rdfentry_"); // true: it's always there
285/// rdf.HasColumn("foo"); // false: it is not there
286/// ~~~
288{
289 if (fColRegister.IsDefineOrAlias(columnName))
290 return true;
291
292 if (fLoopManager->GetTree()) {
293 const auto &branchNames = fLoopManager->GetBranchNames();
294 const auto branchNamesEnd = branchNames.end();
296 return true;
297 }
298
299 if (auto ds = GetDataSource(); ds->HasColumn(columnName))
300 return true;
301
302 return false;
303}
304
305/// \brief Gets the number of data processing slots.
306/// \return The number of data processing slots used by this RDataFrame instance
307///
308/// This method returns the number of data processing slots used by this RDataFrame
309/// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
310///
311/// Example usage:
312/// ~~~{.cpp}
313/// ROOT::EnableImplicitMT(6)
314/// ROOT::RDataFrame df(1);
315/// std::cout << df.GetNSlots() << std::endl; // prints "6"
316/// ~~~
318{
319 return fLoopManager->GetNSlots();
320}
321
322/// \brief Gets the number of event loops run.
323/// \return The number of event loops run by this RDataFrame instance
324///
325/// This method returns the number of events loops run so far by this RDataFrame instance.
326///
327/// Example usage:
328/// ~~~{.cpp}
329/// ROOT::RDataFrame df(1);
330/// std::cout << df.GetNRuns() << std::endl; // prints "0"
331/// df.Sum("rdfentry_").GetValue(); // trigger the event loop
332/// std::cout << df.GetNRuns() << std::endl; // prints "1"
333/// df.Sum("rdfentry_").GetValue(); // trigger another event loop
334/// std::cout << df.GetNRuns() << std::endl; // prints "2"
335/// ~~~
337{
338 return fLoopManager->GetNRuns();
339}
340
342{
343 std::vector<std::string> types;
344
345 for (auto column : columnList) {
346 types.push_back(GetColumnType(column));
347 }
348 return types;
349}
350
352{
354 std::string error(callerName);
355 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
356 throw std::runtime_error(error);
357 }
358}
359
361{
362 // Entry number column
363 const std::string entryColName = "rdfentry_";
364 const std::string entryColType = "ULong64_t";
365 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
366 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
367
368 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
369 ColumnNames_t{}, fColRegister, *fLoopManager);
370 fColRegister.AddDefine(std::move(entryColumn));
371
372 // Slot number column
373 const std::string slotColName = "rdfslot_";
374 const std::string slotColType = "unsigned int";
375 auto slotColGen = [](unsigned int slot) { return slot; };
376 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
377
378 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
379 fColRegister, *fLoopManager);
380 fColRegister.AddDefine(std::move(slotColumn));
381
382 fColRegister.AddAlias("tdfentry_", entryColName);
383 fColRegister.AddAlias("tdfslot_", slotColName);
384}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
char name[80]
Definition TGX11.cxx:110
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
unsigned int GetNRuns() const
Gets the number of event loops run.
RDataSource * GetDataSource() const
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
A descriptor for the systematic variations known to a given RDataFrame node.
const_iterator begin() const
const_iterator end() const
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
const Int_t n
Definition legend1.C:16
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:395
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:569
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:233
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:386
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:558
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570