Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFGraphUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo, CERN, Massimo Tumolo Politecnico di Torino 08/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
13
14#include <algorithm> // std::find
15
16namespace ROOT {
17namespace Internal {
18namespace RDF {
19
20std::shared_ptr<GraphDrawing::GraphNode>
21GraphDrawing::CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RDefineBase *columnPtr,
22 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
23{
24 // If there is already a node for this define (recognized by the custom column it is defining) return it. If there is
25 // not, return a new one.
26 auto duplicateDefineIt = visitedMap.find((void *)columnPtr);
27 if (duplicateDefineIt != visitedMap.end())
28 return duplicateDefineIt->second;
29
30 auto node = std::make_shared<GraphNode>("Define\\n" + columnName, visitedMap.size(), ENodeType::kDefine);
31 visitedMap[(void *)columnPtr] = node;
32 return node;
33}
34
35std::shared_ptr<GraphDrawing::GraphNode>
37 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
38{
39 // If there is already a node for this filter return it. If there is not, return a new one.
40 auto duplicateFilterIt = visitedMap.find((void *)filterPtr);
41 if (duplicateFilterIt != visitedMap.end()) {
42 duplicateFilterIt->second->SetNotNew();
43 return duplicateFilterIt->second;
44 }
45
46 auto node = std::make_shared<GraphNode>((filterPtr->HasName() ? filterPtr->GetName() : "Filter"), visitedMap.size(),
47 ENodeType::kFilter);
48 visitedMap[(void *)filterPtr] = node;
49 return node;
50}
51
52std::shared_ptr<GraphDrawing::GraphNode>
54 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
55{
56 // If there is already a node for this range return it. If there is not, return a new one.
57 auto duplicateRangeIt = visitedMap.find((void *)rangePtr);
58 if (duplicateRangeIt != visitedMap.end()) {
59 duplicateRangeIt->second->SetNotNew();
60 return duplicateRangeIt->second;
61 }
62
63 auto node = std::make_shared<GraphNode>("Range", visitedMap.size(), ENodeType::kRange);
64 visitedMap[(void *)rangePtr] = node;
65 return node;
66}
67
68std::shared_ptr<GraphDrawing::GraphNode>
69GraphDrawing::AddDefinesToGraph(std::shared_ptr<GraphNode> node, const RColumnRegister &colRegister,
70 const std::vector<std::string> &prevNodeDefines,
71 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
72{
73 auto upmostNode = node;
74 const auto &defineNames = colRegister.GenerateColumnNames();
75 for (auto i = int(defineNames.size()) - 1; i >= 0; --i) { // walk backwards through the names of defined columns
76 const auto colName = defineNames[i];
77 const bool isAlias = colRegister.IsAlias(colName);
78 if (isAlias || IsInternalColumn(colName))
79 continue; // aliases appear in the list of defineNames but we don't support them yet
80 const bool isANewDefine =
81 std::find(prevNodeDefines.begin(), prevNodeDefines.end(), colName) == prevNodeDefines.end();
82 if (!isANewDefine)
83 break; // we walked back through all new defines, the rest is stuff that was already in the graph
84
85 // create a node for this new Define
86 auto defineNode =
87 RDFGraphDrawing::CreateDefineNode(std::string(colName), colRegister.GetDefine(colName), visitedMap);
88 upmostNode->SetPrevNode(defineNode);
89 upmostNode = defineNode;
90 }
91
92 return upmostNode;
93}
94
95namespace GraphDrawing {
96
98{
99 // Only the mapping between node id and node label (i.e. name)
100 std::stringstream dotStringLabels;
101 // Representation of the relationships between nodes
102 std::stringstream dotStringGraph;
103
104 // Explore the graph bottom-up and store its dot representation.
105 const GraphNode *leaf = &start;
106 while (leaf) {
107 dotStringLabels << "\t" << leaf->GetID() << " [label=\"" << leaf->GetName()
108 << "\", style=\"filled\", fillcolor=\"" << leaf->GetColor() << "\", shape=\"" << leaf->GetShape()
109 << "\"];\n";
110 if (leaf->GetPrevNode()) {
111 dotStringGraph << "\t" << leaf->GetPrevNode()->GetID() << " -> " << leaf->GetID() << ";\n";
112 }
113 leaf = leaf->GetPrevNode();
114 }
115
116 return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}";
117}
118
119std::string GraphCreatorHelper::FromGraphActionsToDot(std::vector<std::shared_ptr<GraphNode>> leaves) const
120{
121 // Only the mapping between node id and node label (i.e. name)
122 std::stringstream dotStringLabels;
123 // Representation of the relationships between nodes
124 std::stringstream dotStringGraph;
125
126 for (auto leafShPtr : leaves) {
127 GraphNode *leaf = leafShPtr.get();
128 while (leaf && !leaf->IsExplored()) {
129 dotStringLabels << "\t" << leaf->GetID() << " [label=\"" << leaf->GetName()
130 << "\", style=\"filled\", fillcolor=\"" << leaf->GetColor() << "\", shape=\""
131 << leaf->GetShape() << "\"];\n";
132 if (leaf->GetPrevNode()) {
133 dotStringGraph << "\t" << leaf->GetPrevNode()->GetID() << " -> " << leaf->GetID() << ";\n";
134 }
135 // Multiple branches may share the same nodes. It is wrong to explore them more than once.
136 leaf->SetExplored();
137 leaf = leaf->GetPrevNode();
138 }
139 }
140 return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}";
141}
142
144{
145 auto loopManager = rDataFrame.GetLoopManager();
146 // Jitting is triggered because nodes must not be empty at the time of the calling in order to draw the graph.
147 loopManager->Jit();
148
149 return RepresentGraph(loopManager);
150}
151
153{
154 const auto actions = loopManager->GetAllActions();
155 const auto edges = loopManager->GetGraphEdges();
156
157 std::vector<std::shared_ptr<GraphNode>> nodes;
158 nodes.reserve(actions.size() + edges.size());
159
160 for (auto *action : actions)
161 nodes.emplace_back(action->GetGraph(fVisitedMap));
162 for (auto *edge : edges)
163 nodes.emplace_back(edge->GetGraph(fVisitedMap));
164
165 return FromGraphActionsToDot(std::move(nodes));
166}
167
168} // namespace GraphDrawing
169} // namespace RDF
170} // namespace Internal
171} // namespace ROOT
std::string GetName() const
The head node of a RDF computation graph.
std::vector< RDFInternal::RActionBase * > GetAllActions() const
Return all actions, either booked or already run.
std::vector< RNodeBase * > GetGraphEdges() const
Return all graph edges known to RLoopManager This includes Filters and Ranges but not Defines.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
std::string FromGraphLeafToDot(const GraphNode &leaf) const
Starting from any leaf (Action, Filter, Range) it draws the dot representation of the branch.
std::string RepresentGraph(ROOT::RDataFrame &rDataFrame)
Starting from the root node, prints the entire graph.
std::string FromGraphActionsToDot(std::vector< std::shared_ptr< GraphNode > > leaves) const
Starting by an array of leaves, it draws the entire graph.
std::unordered_map< void *, std::shared_ptr< GraphNode > > fVisitedMap
Map to keep track of visited nodes when constructing the computation graph (SaveGraph)
Class used to create the operation graph to be printed in the dot representation.
Definition GraphNode.hxx:47
void SetExplored()
Allows to stop the graph traversal when an explored node is encountered.
A binder for user-defined columns, variations and aliases.
std::vector< std::string_view > GenerateColumnNames() const
Return the list of the names of the defined columns (Defines + Aliases).
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RDFDetail::RLoopManager * GetLoopManager() const
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
std::shared_ptr< GraphNode > CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > AddDefinesToGraph(std::shared_ptr< GraphNode > node, const RColumnRegister &colRegister, const std::vector< std::string > &prevNodeDefines, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RDefineBase *columnPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:363
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...