1 | //===- DependenceGraphBuilder.cpp ------------------------------------------==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file implements common steps of the build algorithm for construction |
9 | // of dependence graphs such as DDG and PDG. |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "llvm/Analysis/DependenceGraphBuilder.h" |
13 | #include "llvm/ADT/DepthFirstIterator.h" |
14 | #include "llvm/ADT/EnumeratedArray.h" |
15 | #include "llvm/ADT/PostOrderIterator.h" |
16 | #include "llvm/ADT/SCCIterator.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/DDG.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | #define DEBUG_TYPE "dgb" |
23 | |
24 | STATISTIC(TotalGraphs, "Number of dependence graphs created." ); |
25 | STATISTIC(TotalDefUseEdges, "Number of def-use edges created." ); |
26 | STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created." ); |
27 | STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created." ); |
28 | STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created." ); |
29 | STATISTIC(TotalConfusedEdges, |
30 | "Number of confused memory dependencies between two nodes." ); |
31 | STATISTIC(TotalEdgeReversals, |
32 | "Number of times the source and sink of dependence was reversed to " |
33 | "expose cycles in the graph." ); |
34 | |
35 | using InstructionListType = SmallVector<Instruction *, 2>; |
36 | |
37 | //===--------------------------------------------------------------------===// |
38 | // AbstractDependenceGraphBuilder implementation |
39 | //===--------------------------------------------------------------------===// |
40 | |
41 | template <class G> |
42 | void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() { |
43 | // The BBList is expected to be in program order. |
44 | size_t NextOrdinal = 1; |
45 | for (auto *BB : BBList) |
46 | for (auto &I : *BB) |
47 | InstOrdinalMap.insert(KV: std::make_pair(x: &I, y: NextOrdinal++)); |
48 | } |
49 | |
50 | template <class G> |
51 | void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() { |
52 | ++TotalGraphs; |
53 | assert(IMap.empty() && "Expected empty instruction map at start" ); |
54 | for (BasicBlock *BB : BBList) |
55 | for (Instruction &I : *BB) { |
56 | auto &NewNode = createFineGrainedNode(I); |
57 | IMap.insert(std::make_pair(&I, &NewNode)); |
58 | NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I))); |
59 | ++TotalFineGrainedNodes; |
60 | } |
61 | } |
62 | |
63 | template <class G> |
64 | void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() { |
65 | // Create a root node that connects to every connected component of the graph. |
66 | // This is done to allow graph iterators to visit all the disjoint components |
67 | // of the graph, in a single walk. |
68 | // |
69 | // This algorithm works by going through each node of the graph and for each |
70 | // node N, do a DFS starting from N. A rooted edge is established between the |
71 | // root node and N (if N is not yet visited). All the nodes reachable from N |
72 | // are marked as visited and are skipped in the DFS of subsequent nodes. |
73 | // |
74 | // Note: This algorithm tries to limit the number of edges out of the root |
75 | // node to some extent, but there may be redundant edges created depending on |
76 | // the iteration order. For example for a graph {A -> B}, an edge from the |
77 | // root node is added to both nodes if B is visited before A. While it does |
78 | // not result in minimal number of edges, this approach saves compile-time |
79 | // while keeping the number of edges in check. |
80 | auto &RootNode = createRootNode(); |
81 | df_iterator_default_set<const NodeType *, 4> Visited; |
82 | for (auto *N : Graph) { |
83 | if (*N == RootNode) |
84 | continue; |
85 | for (auto I : depth_first_ext(N, Visited)) |
86 | if (I == N) |
87 | createRootedEdge(Src&: RootNode, Tgt&: *N); |
88 | } |
89 | } |
90 | |
91 | template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() { |
92 | if (!shouldCreatePiBlocks()) |
93 | return; |
94 | |
95 | LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n" ); |
96 | |
97 | // The overall algorithm is as follows: |
98 | // 1. Identify SCCs and for each SCC create a pi-block node containing all |
99 | // the nodes in that SCC. |
100 | // 2. Identify incoming edges incident to the nodes inside of the SCC and |
101 | // reconnect them to the pi-block node. |
102 | // 3. Identify outgoing edges from the nodes inside of the SCC to nodes |
103 | // outside of it and reconnect them so that the edges are coming out of the |
104 | // SCC node instead. |
105 | |
106 | // Adding nodes as we iterate through the SCCs cause the SCC |
107 | // iterators to get invalidated. To prevent this invalidation, we first |
108 | // collect a list of nodes that are part of an SCC, and then iterate over |
109 | // those lists to create the pi-block nodes. Each element of the list is a |
110 | // list of nodes in an SCC. Note: trivial SCCs containing a single node are |
111 | // ignored. |
112 | SmallVector<NodeListType, 4> ListOfSCCs; |
113 | for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) { |
114 | if (SCC.size() > 1) |
115 | ListOfSCCs.emplace_back(SCC.begin(), SCC.end()); |
116 | } |
117 | |
118 | for (NodeListType &NL : ListOfSCCs) { |
119 | LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size() |
120 | << " nodes in it.\n" ); |
121 | |
122 | // SCC iterator may put the nodes in an order that's different from the |
123 | // program order. To preserve original program order, we sort the list of |
124 | // nodes based on ordinal numbers computed earlier. |
125 | llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) { |
126 | return getOrdinal(*LHS) < getOrdinal(*RHS); |
127 | }); |
128 | |
129 | NodeType &PiNode = createPiBlock(L: NL); |
130 | ++TotalPiBlockNodes; |
131 | |
132 | // Build a set to speed up the lookup for edges whose targets |
133 | // are inside the SCC. |
134 | SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end()); |
135 | |
136 | // We have the set of nodes in the SCC. We go through the set of nodes |
137 | // that are outside of the SCC and look for edges that cross the two sets. |
138 | for (NodeType *N : Graph) { |
139 | |
140 | // Skip the SCC node and all the nodes inside of it. |
141 | if (*N == PiNode || NodesInSCC.count(N)) |
142 | continue; |
143 | |
144 | enum Direction { |
145 | Incoming, // Incoming edges to the SCC |
146 | Outgoing, // Edges going ot of the SCC |
147 | DirectionCount // To make the enum usable as an array index. |
148 | }; |
149 | |
150 | // Use these flags to help us avoid creating redundant edges. If there |
151 | // are more than one edges from an outside node to inside nodes, we only |
152 | // keep one edge from that node to the pi-block node. Similarly, if |
153 | // there are more than one edges from inside nodes to an outside node, |
154 | // we only keep one edge from the pi-block node to the outside node. |
155 | // There is a flag defined for each direction (incoming vs outgoing) and |
156 | // for each type of edge supported, using a two-dimensional boolean |
157 | // array. |
158 | using EdgeKind = typename EdgeType::EdgeKind; |
159 | EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false, |
160 | false}; |
161 | |
162 | auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, |
163 | const EdgeKind K) { |
164 | switch (K) { |
165 | case EdgeKind::RegisterDefUse: |
166 | createDefUseEdge(Src, Tgt&: Dst); |
167 | break; |
168 | case EdgeKind::MemoryDependence: |
169 | createMemoryEdge(Src, Tgt&: Dst); |
170 | break; |
171 | case EdgeKind::Rooted: |
172 | createRootedEdge(Src, Tgt&: Dst); |
173 | break; |
174 | default: |
175 | llvm_unreachable("Unsupported type of edge." ); |
176 | } |
177 | }; |
178 | |
179 | auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, |
180 | const Direction Dir) { |
181 | if (!Src->hasEdgeTo(*Dst)) |
182 | return; |
183 | LLVM_DEBUG( |
184 | dbgs() << "reconnecting(" |
185 | << (Dir == Direction::Incoming ? "incoming)" : "outgoing)" ) |
186 | << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New |
187 | << "\n" ); |
188 | assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && |
189 | "Invalid direction." ); |
190 | |
191 | SmallVector<EdgeType *, 10> EL; |
192 | Src->findEdgesTo(*Dst, EL); |
193 | for (EdgeType *OldEdge : EL) { |
194 | EdgeKind Kind = OldEdge->getKind(); |
195 | if (!EdgeAlreadyCreated[Dir][Kind]) { |
196 | if (Dir == Direction::Incoming) { |
197 | createEdgeOfKind(*Src, *New, Kind); |
198 | LLVM_DEBUG(dbgs() << "created edge from Src to New.\n" ); |
199 | } else if (Dir == Direction::Outgoing) { |
200 | createEdgeOfKind(*New, *Dst, Kind); |
201 | LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n" ); |
202 | } |
203 | EdgeAlreadyCreated[Dir][Kind] = true; |
204 | } |
205 | Src->removeEdge(*OldEdge); |
206 | destroyEdge(E&: *OldEdge); |
207 | LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n" ); |
208 | } |
209 | }; |
210 | |
211 | for (NodeType *SCCNode : NL) { |
212 | // Process incoming edges incident to the pi-block node. |
213 | reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming); |
214 | |
215 | // Process edges that are coming out of the pi-block node. |
216 | reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing); |
217 | } |
218 | } |
219 | } |
220 | |
221 | // Ordinal maps are no longer needed. |
222 | InstOrdinalMap.clear(); |
223 | NodeOrdinalMap.clear(); |
224 | |
225 | LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n" ); |
226 | } |
227 | |
228 | template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() { |
229 | for (NodeType *N : Graph) { |
230 | InstructionListType SrcIList; |
231 | N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); |
232 | |
233 | // Use a set to mark the targets that we link to N, so we don't add |
234 | // duplicate def-use edges when more than one instruction in a target node |
235 | // use results of instructions that are contained in N. |
236 | SmallPtrSet<NodeType *, 4> VisitedTargets; |
237 | |
238 | for (Instruction *II : SrcIList) { |
239 | for (User *U : II->users()) { |
240 | Instruction *UI = dyn_cast<Instruction>(Val: U); |
241 | if (!UI) |
242 | continue; |
243 | NodeType *DstNode = nullptr; |
244 | if (IMap.find(UI) != IMap.end()) |
245 | DstNode = IMap.find(UI)->second; |
246 | |
247 | // In the case of loops, the scope of the subgraph is all the |
248 | // basic blocks (and instructions within them) belonging to the loop. We |
249 | // simply ignore all the edges coming from (or going into) instructions |
250 | // or basic blocks outside of this range. |
251 | if (!DstNode) { |
252 | LLVM_DEBUG( |
253 | dbgs() |
254 | << "skipped def-use edge since the sink" << *UI |
255 | << " is outside the range of instructions being considered.\n" ); |
256 | continue; |
257 | } |
258 | |
259 | // Self dependencies are ignored because they are redundant and |
260 | // uninteresting. |
261 | if (DstNode == N) { |
262 | LLVM_DEBUG(dbgs() |
263 | << "skipped def-use edge since the sink and the source (" |
264 | << N << ") are the same.\n" ); |
265 | continue; |
266 | } |
267 | |
268 | if (VisitedTargets.insert(DstNode).second) { |
269 | createDefUseEdge(Src&: *N, Tgt&: *DstNode); |
270 | ++TotalDefUseEdges; |
271 | } |
272 | } |
273 | } |
274 | } |
275 | } |
276 | |
277 | template <class G> |
278 | void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() { |
279 | using DGIterator = typename G::iterator; |
280 | auto isMemoryAccess = [](const Instruction *I) { |
281 | return I->mayReadOrWriteMemory(); |
282 | }; |
283 | for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { |
284 | InstructionListType SrcIList; |
285 | (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); |
286 | if (SrcIList.empty()) |
287 | continue; |
288 | |
289 | for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { |
290 | if (**SrcIt == **DstIt) |
291 | continue; |
292 | InstructionListType DstIList; |
293 | (*DstIt)->collectInstructions(isMemoryAccess, DstIList); |
294 | if (DstIList.empty()) |
295 | continue; |
296 | bool ForwardEdgeCreated = false; |
297 | bool BackwardEdgeCreated = false; |
298 | for (Instruction *ISrc : SrcIList) { |
299 | for (Instruction *IDst : DstIList) { |
300 | auto D = DI.depends(Src: ISrc, Dst: IDst, PossiblyLoopIndependent: true); |
301 | if (!D) |
302 | continue; |
303 | |
304 | // If we have a dependence with its left-most non-'=' direction |
305 | // being '>' we need to reverse the direction of the edge, because |
306 | // the source of the dependence cannot occur after the sink. For |
307 | // confused dependencies, we will create edges in both directions to |
308 | // represent the possibility of a cycle. |
309 | |
310 | auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { |
311 | if (!ForwardEdgeCreated) { |
312 | createMemoryEdge(Src, Tgt&: Dst); |
313 | ++TotalMemoryEdges; |
314 | } |
315 | if (!BackwardEdgeCreated) { |
316 | createMemoryEdge(Src&: Dst, Tgt&: Src); |
317 | ++TotalMemoryEdges; |
318 | } |
319 | ForwardEdgeCreated = BackwardEdgeCreated = true; |
320 | ++TotalConfusedEdges; |
321 | }; |
322 | |
323 | auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { |
324 | if (!ForwardEdgeCreated) { |
325 | createMemoryEdge(Src, Tgt&: Dst); |
326 | ++TotalMemoryEdges; |
327 | } |
328 | ForwardEdgeCreated = true; |
329 | }; |
330 | |
331 | auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { |
332 | if (!BackwardEdgeCreated) { |
333 | createMemoryEdge(Src&: Dst, Tgt&: Src); |
334 | ++TotalMemoryEdges; |
335 | } |
336 | BackwardEdgeCreated = true; |
337 | }; |
338 | |
339 | if (D->isConfused()) |
340 | createConfusedEdges(**SrcIt, **DstIt); |
341 | else if (D->isOrdered() && !D->isLoopIndependent()) { |
342 | bool ReversedEdge = false; |
343 | for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { |
344 | if (D->getDirection(Level) == Dependence::DVEntry::EQ) |
345 | continue; |
346 | else if (D->getDirection(Level) == Dependence::DVEntry::GT) { |
347 | createBackwardEdge(**SrcIt, **DstIt); |
348 | ReversedEdge = true; |
349 | ++TotalEdgeReversals; |
350 | break; |
351 | } else if (D->getDirection(Level) == Dependence::DVEntry::LT) |
352 | break; |
353 | else { |
354 | createConfusedEdges(**SrcIt, **DstIt); |
355 | break; |
356 | } |
357 | } |
358 | if (!ReversedEdge) |
359 | createForwardEdge(**SrcIt, **DstIt); |
360 | } else |
361 | createForwardEdge(**SrcIt, **DstIt); |
362 | |
363 | // Avoid creating duplicate edges. |
364 | if (ForwardEdgeCreated && BackwardEdgeCreated) |
365 | break; |
366 | } |
367 | |
368 | // If we've created edges in both directions, there is no more |
369 | // unique edge that we can create between these two nodes, so we |
370 | // can exit early. |
371 | if (ForwardEdgeCreated && BackwardEdgeCreated) |
372 | break; |
373 | } |
374 | } |
375 | } |
376 | } |
377 | |
378 | template <class G> void AbstractDependenceGraphBuilder<G>::simplify() { |
379 | if (!shouldSimplify()) |
380 | return; |
381 | LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n" ); |
382 | |
383 | // This algorithm works by first collecting a set of candidate nodes that have |
384 | // an out-degree of one (in terms of def-use edges), and then ignoring those |
385 | // whose targets have an in-degree more than one. Each node in the resulting |
386 | // set can then be merged with its corresponding target and put back into the |
387 | // worklist until no further merge candidates are available. |
388 | SmallPtrSet<NodeType *, 32> CandidateSourceNodes; |
389 | |
390 | // A mapping between nodes and their in-degree. To save space, this map |
391 | // only contains nodes that are targets of nodes in the CandidateSourceNodes. |
392 | DenseMap<NodeType *, unsigned> TargetInDegreeMap; |
393 | |
394 | for (NodeType *N : Graph) { |
395 | if (N->getEdges().size() != 1) |
396 | continue; |
397 | EdgeType &Edge = N->back(); |
398 | if (!Edge.isDefUse()) |
399 | continue; |
400 | CandidateSourceNodes.insert(N); |
401 | |
402 | // Insert an element into the in-degree map and initialize to zero. The |
403 | // count will get updated in the next step. |
404 | TargetInDegreeMap.insert({&Edge.getTargetNode(), 0}); |
405 | } |
406 | |
407 | LLVM_DEBUG({ |
408 | dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size() |
409 | << "\nNode with single outgoing def-use edge:\n" ; |
410 | for (NodeType *N : CandidateSourceNodes) { |
411 | dbgs() << N << "\n" ; |
412 | } |
413 | }); |
414 | |
415 | for (NodeType *N : Graph) { |
416 | for (EdgeType *E : *N) { |
417 | NodeType *Tgt = &E->getTargetNode(); |
418 | auto TgtIT = TargetInDegreeMap.find(Tgt); |
419 | if (TgtIT != TargetInDegreeMap.end()) |
420 | ++(TgtIT->second); |
421 | } |
422 | } |
423 | |
424 | LLVM_DEBUG({ |
425 | dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size() |
426 | << "\nContent of in-degree map:\n" ; |
427 | for (auto &I : TargetInDegreeMap) { |
428 | dbgs() << I.first << " --> " << I.second << "\n" ; |
429 | } |
430 | }); |
431 | |
432 | SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(), |
433 | CandidateSourceNodes.end()); |
434 | while (!Worklist.empty()) { |
435 | NodeType &Src = *Worklist.pop_back_val(); |
436 | // As nodes get merged, we need to skip any node that has been removed from |
437 | // the candidate set (see below). |
438 | if (!CandidateSourceNodes.erase(&Src)) |
439 | continue; |
440 | |
441 | assert(Src.getEdges().size() == 1 && |
442 | "Expected a single edge from the candidate src node." ); |
443 | NodeType &Tgt = Src.back().getTargetNode(); |
444 | assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() && |
445 | "Expected target to be in the in-degree map." ); |
446 | |
447 | if (TargetInDegreeMap[&Tgt] != 1) |
448 | continue; |
449 | |
450 | if (!areNodesMergeable(A: Src, B: Tgt)) |
451 | continue; |
452 | |
453 | // Do not merge if there is also an edge from target to src (immediate |
454 | // cycle). |
455 | if (Tgt.hasEdgeTo(Src)) |
456 | continue; |
457 | |
458 | LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n" ); |
459 | |
460 | mergeNodes(A&: Src, B&: Tgt); |
461 | |
462 | // If the target node is in the candidate set itself, we need to put the |
463 | // src node back into the worklist again so it gives the target a chance |
464 | // to get merged into it. For example if we have: |
465 | // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, |
466 | // then after merging (a) and (b) together, we need to put (a,b) back in |
467 | // the worklist so that (c) can get merged in as well resulting in |
468 | // {(a,b,c) -> d} |
469 | // We also need to remove the old target (b), from the worklist. We first |
470 | // remove it from the candidate set here, and skip any item from the |
471 | // worklist that is not in the set. |
472 | if (CandidateSourceNodes.erase(&Tgt)) { |
473 | Worklist.push_back(&Src); |
474 | CandidateSourceNodes.insert(&Src); |
475 | LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n" ); |
476 | } |
477 | } |
478 | LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n" ); |
479 | } |
480 | |
481 | template <class G> |
482 | void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { |
483 | |
484 | // If we don't create pi-blocks, then we may not have a DAG. |
485 | if (!shouldCreatePiBlocks()) |
486 | return; |
487 | |
488 | SmallVector<NodeType *, 64> NodesInPO; |
489 | using NodeKind = typename NodeType::NodeKind; |
490 | for (NodeType *N : post_order(&Graph)) { |
491 | if (N->getKind() == NodeKind::PiBlock) { |
492 | // Put members of the pi-block right after the pi-block itself, for |
493 | // convenience. |
494 | const NodeListType &PiBlockMembers = getNodesInPiBlock(N: *N); |
495 | llvm::append_range(NodesInPO, PiBlockMembers); |
496 | } |
497 | NodesInPO.push_back(N); |
498 | } |
499 | |
500 | size_t OldSize = Graph.Nodes.size(); |
501 | Graph.Nodes.clear(); |
502 | append_range(Graph.Nodes, reverse(NodesInPO)); |
503 | if (Graph.Nodes.size() != OldSize) |
504 | assert(false && |
505 | "Expected the number of nodes to stay the same after the sort" ); |
506 | } |
507 | |
508 | template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>; |
509 | template class llvm::DependenceGraphInfo<DDGNode>; |
510 | |