Logo ROOT  
Reference Guide
RDFGraphUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo, CERN, Massimo Tumolo Politecnico di Torino 08/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
13
14#include <algorithm> // std::find
15
16namespace ROOT {
17namespace Internal {
18namespace RDF {
19
20std::shared_ptr<GraphDrawing::GraphNode>
21GraphDrawing::CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RDefineBase *columnPtr,
22 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
23{
24 // If there is already a node for this define (recognized by the custom column it is defining) return it. If there is
25 // not, return a new one.
26 auto duplicateDefineIt = visitedMap.find((void *)columnPtr);
27 if (duplicateDefineIt != visitedMap.end())
28 return duplicateDefineIt->second;
29
30 auto node = std::make_shared<GraphNode>("Define\\n" + columnName, visitedMap.size(), ENodeType::kDefine);
31 visitedMap[(void *)columnPtr] = node;
32 return node;
33}
34
35std::shared_ptr<GraphDrawing::GraphNode>
37 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
38{
39 // If there is already a node for this filter return it. If there is not, return a new one.
40 auto duplicateFilterIt = visitedMap.find((void *)filterPtr);
41 if (duplicateFilterIt != visitedMap.end()) {
42 duplicateFilterIt->second->SetNotNew();
43 return duplicateFilterIt->second;
44 }
45
46 auto node = std::make_shared<GraphNode>((filterPtr->HasName() ? filterPtr->GetName() : "Filter"), visitedMap.size(),
47 ENodeType::kFilter);
48 visitedMap[(void *)filterPtr] = node;
49 return node;
50}
51
52std::shared_ptr<GraphDrawing::GraphNode>
54 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
55{
56 // If there is already a node for this range return it. If there is not, return a new one.
57 auto duplicateRangeIt = visitedMap.find((void *)rangePtr);
58 if (duplicateRangeIt != visitedMap.end()) {
59 duplicateRangeIt->second->SetNotNew();
60 return duplicateRangeIt->second;
61 }
62
63 auto node = std::make_shared<GraphNode>("Range", visitedMap.size(), ENodeType::kRange);
64 visitedMap[(void *)rangePtr] = node;
65 return node;
66}
67
68std::shared_ptr<GraphDrawing::GraphNode>
69GraphDrawing::AddDefinesToGraph(std::shared_ptr<GraphNode> node, const RColumnRegister &colRegister,
70 const std::vector<std::string> &prevNodeDefines,
71 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap)
72{
73 auto upmostNode = node;
74 const auto &defineNames = colRegister.GetNames();
75 for (auto i = int(defineNames.size()) - 1; i >= 0; --i) { // walk backwards through the names of defined columns
76 const auto colName = defineNames[i];
77 const bool isAlias = colRegister.IsDefineOrAlias(colName) && colRegister.GetDefine(colName) == nullptr;
78 if (isAlias || IsInternalColumn(colName))
79 continue; // aliases appear in the list of defineNames but we don't support them yet
80 const bool isANewDefine =
81 std::find(prevNodeDefines.begin(), prevNodeDefines.end(), colName) == prevNodeDefines.end();
82 if (!isANewDefine)
83 break; // we walked back through all new defines, the rest is stuff that was already in the graph
84
85 // create a node for this new Define
86 auto defineNode = RDFGraphDrawing::CreateDefineNode(colName, colRegister.GetDefine(colName), visitedMap);
87 upmostNode->SetPrevNode(defineNode);
88 upmostNode = defineNode;
89 }
90
91 return upmostNode;
92}
93
94namespace GraphDrawing {
95
97{
98 // Only the mapping between node id and node label (i.e. name)
99 std::stringstream dotStringLabels;
100 // Representation of the relationships between nodes
101 std::stringstream dotStringGraph;
102
103 // Explore the graph bottom-up and store its dot representation.
104 const GraphNode *leaf = &start;
105 while (leaf) {
106 dotStringLabels << "\t" << leaf->GetID() << " [label=\"" << leaf->GetName()
107 << "\", style=\"filled\", fillcolor=\"" << leaf->GetColor() << "\", shape=\"" << leaf->GetShape()
108 << "\"];\n";
109 if (leaf->GetPrevNode()) {
110 dotStringGraph << "\t" << leaf->GetPrevNode()->GetID() << " -> " << leaf->GetID() << ";\n";
111 }
112 leaf = leaf->GetPrevNode();
113 }
114
115 return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}";
116}
117
118std::string GraphCreatorHelper::FromGraphActionsToDot(std::vector<std::shared_ptr<GraphNode>> leaves) const
119{
120 // Only the mapping between node id and node label (i.e. name)
121 std::stringstream dotStringLabels;
122 // Representation of the relationships between nodes
123 std::stringstream dotStringGraph;
124
125 for (auto leafShPtr : leaves) {
126 GraphNode *leaf = leafShPtr.get();
127 while (leaf && !leaf->IsExplored()) {
128 dotStringLabels << "\t" << leaf->GetID() << " [label=\"" << leaf->GetName()
129 << "\", style=\"filled\", fillcolor=\"" << leaf->GetColor() << "\", shape=\""
130 << leaf->GetShape() << "\"];\n";
131 if (leaf->GetPrevNode()) {
132 dotStringGraph << "\t" << leaf->GetPrevNode()->GetID() << " -> " << leaf->GetID() << ";\n";
133 }
134 // Multiple branches may share the same nodes. It is wrong to explore them more than once.
135 leaf->SetExplored();
136 leaf = leaf->GetPrevNode();
137 }
138 }
139 return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}";
140}
141
143{
144 auto loopManager = rDataFrame.GetLoopManager();
145 // Jitting is triggered because nodes must not be empty at the time of the calling in order to draw the graph.
146 loopManager->Jit();
147
148 return RepresentGraph(loopManager);
149}
150
152{
153 const auto actions = loopManager->GetAllActions();
154 const auto edges = loopManager->GetGraphEdges();
155
156 std::vector<std::shared_ptr<GraphNode>> nodes;
157 nodes.reserve(actions.size() + edges.size());
158
159 for (auto *action : actions)
160 nodes.emplace_back(action->GetGraph(fVisitedMap));
161 for (auto *edge : edges)
162 nodes.emplace_back(edge->GetGraph(fVisitedMap));
163
164 return FromGraphActionsToDot(std::move(nodes));
165}
166
167} // namespace GraphDrawing
168} // namespace RDF
169} // namespace Internal
170} // namespace ROOT
std::string GetName() const
Definition: RFilterBase.cxx:45
The head node of a RDF computation graph.
std::vector< RDFInternal::RActionBase * > GetAllActions() const
Return all actions, either booked or already run.
std::vector< RNodeBase * > GetGraphEdges() const
Return all graph edges known to RLoopManager This includes Filters and Ranges but not Defines.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
std::string FromGraphLeafToDot(const GraphNode &leaf) const
Starting from any leaf (Action, Filter, Range) it draws the dot representation of the branch.
std::string RepresentGraph(ROOT::RDataFrame &rDataFrame)
Starting from the root node, prints the entire graph.
std::string FromGraphActionsToDot(std::vector< std::shared_ptr< GraphNode > > leaves) const
Starting by an array of leaves, it draws the entire graph.
std::unordered_map< void *, std::shared_ptr< GraphNode > > fVisitedMap
Map to keep track of visited nodes when constructing the computation graph (SaveGraph)
Definition: GraphUtils.hxx:61
Class used to create the operation graph to be printed in the dot representation.
Definition: GraphNode.hxx:47
void SetExplored()
Allows to stop the graph traversal when an explored node is encountered.
Definition: GraphNode.hxx:151
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
ColumnNames_t GetNames() const
Return the list of the names of the defined columns (Defines + Aliases).
RDFDetail::RLoopManager * GetLoopManager() const
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
std::shared_ptr< GraphNode > CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > AddDefinesToGraph(std::shared_ptr< GraphNode > node, const RDFInternal::RColumnRegister &colRegister, const std::vector< std::string > &prevNodeDefines, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RDefineBase *columnPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition: RDFUtils.cxx:363
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.