1 | //===- DependenceGraphBuilder.cpp ------------------------------------------==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file implements common steps of the build algorithm for construction |
9 | // of dependence graphs such as DDG and PDG. |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "llvm/Analysis/DependenceGraphBuilder.h" |
13 | #include "llvm/ADT/DepthFirstIterator.h" |
14 | #include "llvm/ADT/EnumeratedArray.h" |
15 | #include "llvm/ADT/PostOrderIterator.h" |
16 | #include "llvm/ADT/SCCIterator.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/DDG.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | #define DEBUG_TYPE "dgb" |
23 | |
24 | STATISTIC(TotalGraphs, "Number of dependence graphs created." ); |
25 | STATISTIC(TotalDefUseEdges, "Number of def-use edges created." ); |
26 | STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created." ); |
27 | STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created." ); |
28 | STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created." ); |
29 | STATISTIC(TotalConfusedEdges, |
30 | "Number of confused memory dependencies between two nodes." ); |
31 | STATISTIC(TotalEdgeReversals, |
32 | "Number of times the source and sink of dependence was reversed to " |
33 | "expose cycles in the graph." ); |
34 | |
35 | using InstructionListType = SmallVector<Instruction *, 2>; |
36 | |
37 | //===--------------------------------------------------------------------===// |
38 | // AbstractDependenceGraphBuilder implementation |
39 | //===--------------------------------------------------------------------===// |
40 | |
41 | template <class G> |
42 | void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() { |
43 | // The BBList is expected to be in program order. |
44 | size_t NextOrdinal = 1; |
45 | for (auto *BB : BBList) |
46 | for (auto &I : *BB) |
47 | InstOrdinalMap.insert(KV: std::make_pair(x: &I, y: NextOrdinal++)); |
48 | } |
49 | |
50 | template <class G> |
51 | void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() { |
52 | ++TotalGraphs; |
53 | assert(IMap.empty() && "Expected empty instruction map at start" ); |
54 | for (BasicBlock *BB : BBList) |
55 | for (Instruction &I : *BB) { |
56 | auto &NewNode = createFineGrainedNode(I); |
57 | IMap.insert(std::make_pair(&I, &NewNode)); |
58 | NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I))); |
59 | ++TotalFineGrainedNodes; |
60 | } |
61 | } |
62 | |
63 | template <class G> |
64 | void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() { |
65 | // Create a root node that connects to every connected component of the graph. |
66 | // This is done to allow graph iterators to visit all the disjoint components |
67 | // of the graph, in a single walk. |
68 | // |
69 | // This algorithm works by going through each node of the graph and for each |
70 | // node N, do a DFS starting from N. A rooted edge is established between the |
71 | // root node and N (if N is not yet visited). All the nodes reachable from N |
72 | // are marked as visited and are skipped in the DFS of subsequent nodes. |
73 | // |
74 | // Note: This algorithm tries to limit the number of edges out of the root |
75 | // node to some extent, but there may be redundant edges created depending on |
76 | // the iteration order. For example for a graph {A -> B}, an edge from the |
77 | // root node is added to both nodes if B is visited before A. While it does |
78 | // not result in minimal number of edges, this approach saves compile-time |
79 | // while keeping the number of edges in check. |
80 | auto &RootNode = createRootNode(); |
81 | df_iterator_default_set<const NodeType *, 4> Visited; |
82 | for (auto *N : Graph) { |
83 | if (*N == RootNode) |
84 | continue; |
85 | for (auto I : depth_first_ext(N, Visited)) |
86 | if (I == N) |
87 | createRootedEdge(Src&: RootNode, Tgt&: *N); |
88 | } |
89 | } |
90 | |
91 | template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() { |
92 | if (!shouldCreatePiBlocks()) |
93 | return; |
94 | |
95 | LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n" ); |
96 | |
97 | // The overall algorithm is as follows: |
98 | // 1. Identify SCCs and for each SCC create a pi-block node containing all |
99 | // the nodes in that SCC. |
100 | // 2. Identify incoming edges incident to the nodes inside of the SCC and |
101 | // reconnect them to the pi-block node. |
102 | // 3. Identify outgoing edges from the nodes inside of the SCC to nodes |
103 | // outside of it and reconnect them so that the edges are coming out of the |
104 | // SCC node instead. |
105 | |
106 | // Adding nodes as we iterate through the SCCs cause the SCC |
107 | // iterators to get invalidated. To prevent this invalidation, we first |
108 | // collect a list of nodes that are part of an SCC, and then iterate over |
109 | // those lists to create the pi-block nodes. Each element of the list is a |
110 | // list of nodes in an SCC. Note: trivial SCCs containing a single node are |
111 | // ignored. |
112 | SmallVector<NodeListType, 4> ListOfSCCs; |
113 | for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) { |
114 | if (SCC.size() > 1) |
115 | ListOfSCCs.emplace_back(SCC.begin(), SCC.end()); |
116 | } |
117 | |
118 | for (NodeListType &NL : ListOfSCCs) { |
119 | LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size() |
120 | << " nodes in it.\n" ); |
121 | |
122 | // SCC iterator may put the nodes in an order that's different from the |
123 | // program order. To preserve original program order, we sort the list of |
124 | // nodes based on ordinal numbers computed earlier. |
125 | llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) { |
126 | return getOrdinal(*LHS) < getOrdinal(*RHS); |
127 | }); |
128 | |
129 | NodeType &PiNode = createPiBlock(L: NL); |
130 | ++TotalPiBlockNodes; |
131 | |
132 | // Build a set to speed up the lookup for edges whose targets |
133 | // are inside the SCC. |
134 | SmallPtrSet<NodeType *, 4> NodesInSCC(llvm::from_range, NL); |
135 | |
136 | // We have the set of nodes in the SCC. We go through the set of nodes |
137 | // that are outside of the SCC and look for edges that cross the two sets. |
138 | for (NodeType *N : Graph) { |
139 | |
140 | // Skip the SCC node and all the nodes inside of it. |
141 | if (*N == PiNode || NodesInSCC.count(N)) |
142 | continue; |
143 | |
144 | enum Direction { |
145 | Incoming, // Incoming edges to the SCC |
146 | Outgoing, // Edges going ot of the SCC |
147 | DirectionCount // To make the enum usable as an array index. |
148 | }; |
149 | |
150 | // Use these flags to help us avoid creating redundant edges. If there |
151 | // are more than one edges from an outside node to inside nodes, we only |
152 | // keep one edge from that node to the pi-block node. Similarly, if |
153 | // there are more than one edges from inside nodes to an outside node, |
154 | // we only keep one edge from the pi-block node to the outside node. |
155 | // There is a flag defined for each direction (incoming vs outgoing) and |
156 | // for each type of edge supported, using a two-dimensional boolean |
157 | // array. |
158 | using EdgeKind = typename EdgeType::EdgeKind; |
159 | EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false, |
160 | false}; |
161 | |
162 | auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, |
163 | const EdgeKind K) { |
164 | switch (K) { |
165 | case EdgeKind::RegisterDefUse: |
166 | createDefUseEdge(Src, Tgt&: Dst); |
167 | break; |
168 | case EdgeKind::MemoryDependence: |
169 | createMemoryEdge(Src, Tgt&: Dst); |
170 | break; |
171 | case EdgeKind::Rooted: |
172 | createRootedEdge(Src, Tgt&: Dst); |
173 | break; |
174 | default: |
175 | llvm_unreachable("Unsupported type of edge." ); |
176 | } |
177 | }; |
178 | |
179 | auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, |
180 | const Direction Dir) { |
181 | if (!Src->hasEdgeTo(*Dst)) |
182 | return; |
183 | LLVM_DEBUG( |
184 | dbgs() << "reconnecting(" |
185 | << (Dir == Direction::Incoming ? "incoming)" : "outgoing)" ) |
186 | << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New |
187 | << "\n" ); |
188 | assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && |
189 | "Invalid direction." ); |
190 | |
191 | SmallVector<EdgeType *, 10> EL; |
192 | Src->findEdgesTo(*Dst, EL); |
193 | for (EdgeType *OldEdge : EL) { |
194 | EdgeKind Kind = OldEdge->getKind(); |
195 | if (!EdgeAlreadyCreated[Dir][Kind]) { |
196 | if (Dir == Direction::Incoming) { |
197 | createEdgeOfKind(*Src, *New, Kind); |
198 | LLVM_DEBUG(dbgs() << "created edge from Src to New.\n" ); |
199 | } else if (Dir == Direction::Outgoing) { |
200 | createEdgeOfKind(*New, *Dst, Kind); |
201 | LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n" ); |
202 | } |
203 | EdgeAlreadyCreated[Dir][Kind] = true; |
204 | } |
205 | Src->removeEdge(*OldEdge); |
206 | destroyEdge(E&: *OldEdge); |
207 | LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n" ); |
208 | } |
209 | }; |
210 | |
211 | for (NodeType *SCCNode : NL) { |
212 | // Process incoming edges incident to the pi-block node. |
213 | reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming); |
214 | |
215 | // Process edges that are coming out of the pi-block node. |
216 | reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing); |
217 | } |
218 | } |
219 | } |
220 | |
221 | // Ordinal maps are no longer needed. |
222 | InstOrdinalMap.clear(); |
223 | NodeOrdinalMap.clear(); |
224 | |
225 | LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n" ); |
226 | } |
227 | |
228 | template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() { |
229 | for (NodeType *N : Graph) { |
230 | InstructionListType SrcIList; |
231 | N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); |
232 | |
233 | // Use a set to mark the targets that we link to N, so we don't add |
234 | // duplicate def-use edges when more than one instruction in a target node |
235 | // use results of instructions that are contained in N. |
236 | SmallPtrSet<NodeType *, 4> VisitedTargets; |
237 | |
238 | for (Instruction *II : SrcIList) { |
239 | for (User *U : II->users()) { |
240 | Instruction *UI = dyn_cast<Instruction>(Val: U); |
241 | if (!UI) |
242 | continue; |
243 | NodeType *DstNode = IMap.lookup(UI); |
244 | |
245 | // In the case of loops, the scope of the subgraph is all the |
246 | // basic blocks (and instructions within them) belonging to the loop. We |
247 | // simply ignore all the edges coming from (or going into) instructions |
248 | // or basic blocks outside of this range. |
249 | if (!DstNode) { |
250 | LLVM_DEBUG( |
251 | dbgs() |
252 | << "skipped def-use edge since the sink" << *UI |
253 | << " is outside the range of instructions being considered.\n" ); |
254 | continue; |
255 | } |
256 | |
257 | // Self dependencies are ignored because they are redundant and |
258 | // uninteresting. |
259 | if (DstNode == N) { |
260 | LLVM_DEBUG(dbgs() |
261 | << "skipped def-use edge since the sink and the source (" |
262 | << N << ") are the same.\n" ); |
263 | continue; |
264 | } |
265 | |
266 | if (VisitedTargets.insert(DstNode).second) { |
267 | createDefUseEdge(Src&: *N, Tgt&: *DstNode); |
268 | ++TotalDefUseEdges; |
269 | } |
270 | } |
271 | } |
272 | } |
273 | } |
274 | |
275 | template <class G> |
276 | void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() { |
277 | using DGIterator = typename G::iterator; |
278 | auto isMemoryAccess = [](const Instruction *I) { |
279 | return I->mayReadOrWriteMemory(); |
280 | }; |
281 | for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { |
282 | InstructionListType SrcIList; |
283 | (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); |
284 | if (SrcIList.empty()) |
285 | continue; |
286 | |
287 | for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { |
288 | if (**SrcIt == **DstIt) |
289 | continue; |
290 | InstructionListType DstIList; |
291 | (*DstIt)->collectInstructions(isMemoryAccess, DstIList); |
292 | if (DstIList.empty()) |
293 | continue; |
294 | bool ForwardEdgeCreated = false; |
295 | bool BackwardEdgeCreated = false; |
296 | for (Instruction *ISrc : SrcIList) { |
297 | for (Instruction *IDst : DstIList) { |
298 | auto D = DI.depends(Src: ISrc, Dst: IDst); |
299 | if (!D) |
300 | continue; |
301 | |
302 | // If we have a dependence with its left-most non-'=' direction |
303 | // being '>' we need to reverse the direction of the edge, because |
304 | // the source of the dependence cannot occur after the sink. For |
305 | // confused dependencies, we will create edges in both directions to |
306 | // represent the possibility of a cycle. |
307 | |
308 | auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { |
309 | if (!ForwardEdgeCreated) { |
310 | createMemoryEdge(Src, Tgt&: Dst); |
311 | ++TotalMemoryEdges; |
312 | } |
313 | if (!BackwardEdgeCreated) { |
314 | createMemoryEdge(Src&: Dst, Tgt&: Src); |
315 | ++TotalMemoryEdges; |
316 | } |
317 | ForwardEdgeCreated = BackwardEdgeCreated = true; |
318 | ++TotalConfusedEdges; |
319 | }; |
320 | |
321 | auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { |
322 | if (!ForwardEdgeCreated) { |
323 | createMemoryEdge(Src, Tgt&: Dst); |
324 | ++TotalMemoryEdges; |
325 | } |
326 | ForwardEdgeCreated = true; |
327 | }; |
328 | |
329 | auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { |
330 | if (!BackwardEdgeCreated) { |
331 | createMemoryEdge(Src&: Dst, Tgt&: Src); |
332 | ++TotalMemoryEdges; |
333 | } |
334 | BackwardEdgeCreated = true; |
335 | }; |
336 | |
337 | if (D->isConfused()) |
338 | createConfusedEdges(**SrcIt, **DstIt); |
339 | else if (D->isOrdered() && !D->isLoopIndependent()) { |
340 | bool ReversedEdge = false; |
341 | for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { |
342 | if (D->getDirection(Level) == Dependence::DVEntry::EQ) |
343 | continue; |
344 | else if (D->getDirection(Level) == Dependence::DVEntry::GT) { |
345 | createBackwardEdge(**SrcIt, **DstIt); |
346 | ReversedEdge = true; |
347 | ++TotalEdgeReversals; |
348 | break; |
349 | } else if (D->getDirection(Level) == Dependence::DVEntry::LT) |
350 | break; |
351 | else { |
352 | createConfusedEdges(**SrcIt, **DstIt); |
353 | break; |
354 | } |
355 | } |
356 | if (!ReversedEdge) |
357 | createForwardEdge(**SrcIt, **DstIt); |
358 | } else |
359 | createForwardEdge(**SrcIt, **DstIt); |
360 | |
361 | // Avoid creating duplicate edges. |
362 | if (ForwardEdgeCreated && BackwardEdgeCreated) |
363 | break; |
364 | } |
365 | |
366 | // If we've created edges in both directions, there is no more |
367 | // unique edge that we can create between these two nodes, so we |
368 | // can exit early. |
369 | if (ForwardEdgeCreated && BackwardEdgeCreated) |
370 | break; |
371 | } |
372 | } |
373 | } |
374 | } |
375 | |
376 | template <class G> void AbstractDependenceGraphBuilder<G>::simplify() { |
377 | if (!shouldSimplify()) |
378 | return; |
379 | LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n" ); |
380 | |
381 | // This algorithm works by first collecting a set of candidate nodes that have |
382 | // an out-degree of one (in terms of def-use edges), and then ignoring those |
383 | // whose targets have an in-degree more than one. Each node in the resulting |
384 | // set can then be merged with its corresponding target and put back into the |
385 | // worklist until no further merge candidates are available. |
386 | SmallPtrSet<NodeType *, 32> CandidateSourceNodes; |
387 | |
388 | // A mapping between nodes and their in-degree. To save space, this map |
389 | // only contains nodes that are targets of nodes in the CandidateSourceNodes. |
390 | DenseMap<NodeType *, unsigned> TargetInDegreeMap; |
391 | |
392 | for (NodeType *N : Graph) { |
393 | if (N->getEdges().size() != 1) |
394 | continue; |
395 | EdgeType &Edge = N->back(); |
396 | if (!Edge.isDefUse()) |
397 | continue; |
398 | CandidateSourceNodes.insert(N); |
399 | |
400 | // Insert an element into the in-degree map and initialize to zero. The |
401 | // count will get updated in the next step. |
402 | TargetInDegreeMap.insert({&Edge.getTargetNode(), 0}); |
403 | } |
404 | |
405 | LLVM_DEBUG({ |
406 | dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size() |
407 | << "\nNode with single outgoing def-use edge:\n" ; |
408 | for (NodeType *N : CandidateSourceNodes) { |
409 | dbgs() << N << "\n" ; |
410 | } |
411 | }); |
412 | |
413 | for (NodeType *N : Graph) { |
414 | for (EdgeType *E : *N) { |
415 | NodeType *Tgt = &E->getTargetNode(); |
416 | auto TgtIT = TargetInDegreeMap.find(Tgt); |
417 | if (TgtIT != TargetInDegreeMap.end()) |
418 | ++(TgtIT->second); |
419 | } |
420 | } |
421 | |
422 | LLVM_DEBUG({ |
423 | dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size() |
424 | << "\nContent of in-degree map:\n" ; |
425 | for (auto &I : TargetInDegreeMap) { |
426 | dbgs() << I.first << " --> " << I.second << "\n" ; |
427 | } |
428 | }); |
429 | |
430 | SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(), |
431 | CandidateSourceNodes.end()); |
432 | while (!Worklist.empty()) { |
433 | NodeType &Src = *Worklist.pop_back_val(); |
434 | // As nodes get merged, we need to skip any node that has been removed from |
435 | // the candidate set (see below). |
436 | if (!CandidateSourceNodes.erase(&Src)) |
437 | continue; |
438 | |
439 | assert(Src.getEdges().size() == 1 && |
440 | "Expected a single edge from the candidate src node." ); |
441 | NodeType &Tgt = Src.back().getTargetNode(); |
442 | assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() && |
443 | "Expected target to be in the in-degree map." ); |
444 | |
445 | if (TargetInDegreeMap[&Tgt] != 1) |
446 | continue; |
447 | |
448 | if (!areNodesMergeable(A: Src, B: Tgt)) |
449 | continue; |
450 | |
451 | // Do not merge if there is also an edge from target to src (immediate |
452 | // cycle). |
453 | if (Tgt.hasEdgeTo(Src)) |
454 | continue; |
455 | |
456 | LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n" ); |
457 | |
458 | mergeNodes(A&: Src, B&: Tgt); |
459 | |
460 | // If the target node is in the candidate set itself, we need to put the |
461 | // src node back into the worklist again so it gives the target a chance |
462 | // to get merged into it. For example if we have: |
463 | // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, |
464 | // then after merging (a) and (b) together, we need to put (a,b) back in |
465 | // the worklist so that (c) can get merged in as well resulting in |
466 | // {(a,b,c) -> d} |
467 | // We also need to remove the old target (b), from the worklist. We first |
468 | // remove it from the candidate set here, and skip any item from the |
469 | // worklist that is not in the set. |
470 | if (CandidateSourceNodes.erase(&Tgt)) { |
471 | Worklist.push_back(&Src); |
472 | CandidateSourceNodes.insert(&Src); |
473 | LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n" ); |
474 | } |
475 | } |
476 | LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n" ); |
477 | } |
478 | |
479 | template <class G> |
480 | void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { |
481 | |
482 | // If we don't create pi-blocks, then we may not have a DAG. |
483 | if (!shouldCreatePiBlocks()) |
484 | return; |
485 | |
486 | SmallVector<NodeType *, 64> NodesInPO; |
487 | using NodeKind = typename NodeType::NodeKind; |
488 | for (NodeType *N : post_order(&Graph)) { |
489 | if (N->getKind() == NodeKind::PiBlock) { |
490 | // Put members of the pi-block right after the pi-block itself, for |
491 | // convenience. |
492 | const NodeListType &PiBlockMembers = getNodesInPiBlock(N: *N); |
493 | llvm::append_range(NodesInPO, PiBlockMembers); |
494 | } |
495 | NodesInPO.push_back(N); |
496 | } |
497 | |
498 | size_t OldSize = Graph.Nodes.size(); |
499 | Graph.Nodes.clear(); |
500 | append_range(Graph.Nodes, reverse(NodesInPO)); |
501 | if (Graph.Nodes.size() != OldSize) |
502 | assert(false && |
503 | "Expected the number of nodes to stay the same after the sort" ); |
504 | } |
505 | |
506 | template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>; |
507 | template class llvm::DependenceGraphInfo<DDGNode>; |
508 | |