1 | //===- NewGVN.cpp - Global Value Numbering Pass ---------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file implements the new LLVM's Global Value Numbering pass. |
11 | /// GVN partitions values computed by a function into congruence classes. |
12 | /// Values ending up in the same congruence class are guaranteed to be the same |
13 | /// for every execution of the program. In that respect, congruency is a |
14 | /// compile-time approximation of equivalence of values at runtime. |
15 | /// The algorithm implemented here uses a sparse formulation and it's based |
16 | /// on the ideas described in the paper: |
17 | /// "A Sparse Algorithm for Predicated Global Value Numbering" from |
18 | /// Karthik Gargi. |
19 | /// |
20 | /// A brief overview of the algorithm: The algorithm is essentially the same as |
21 | /// the standard RPO value numbering algorithm (a good reference is the paper |
22 | /// "SCC based value numbering" by L. Taylor Simpson) with one major difference: |
23 | /// The RPO algorithm proceeds, on every iteration, to process every reachable |
24 | /// block and every instruction in that block. This is because the standard RPO |
25 | /// algorithm does not track what things have the same value number, it only |
26 | /// tracks what the value number of a given operation is (the mapping is |
27 | /// operation -> value number). Thus, when a value number of an operation |
28 | /// changes, it must reprocess everything to ensure all uses of a value number |
29 | /// get updated properly. In constrast, the sparse algorithm we use *also* |
30 | /// tracks what operations have a given value number (IE it also tracks the |
31 | /// reverse mapping from value number -> operations with that value number), so |
32 | /// that it only needs to reprocess the instructions that are affected when |
33 | /// something's value number changes. The vast majority of complexity and code |
34 | /// in this file is devoted to tracking what value numbers could change for what |
35 | /// instructions when various things happen. The rest of the algorithm is |
36 | /// devoted to performing symbolic evaluation, forward propagation, and |
37 | /// simplification of operations based on the value numbers deduced so far |
38 | /// |
39 | /// In order to make the GVN mostly-complete, we use a technique derived from |
40 | /// "Detection of Redundant Expressions: A Complete and Polynomial-time |
41 | /// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA |
42 | /// based GVN algorithms is related to their inability to detect equivalence |
43 | /// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). |
44 | /// We resolve this issue by generating the equivalent "phi of ops" form for |
45 | /// each op of phis we see, in a way that only takes polynomial time to resolve. |
46 | /// |
47 | /// We also do not perform elimination by using any published algorithm. All |
48 | /// published algorithms are O(Instructions). Instead, we use a technique that |
49 | /// is O(number of operations with the same value number), enabling us to skip |
50 | /// trying to eliminate things that have unique value numbers. |
51 | // |
52 | //===----------------------------------------------------------------------===// |
53 | |
54 | #include "llvm/Transforms/Scalar/NewGVN.h" |
55 | #include "llvm/ADT/ArrayRef.h" |
56 | #include "llvm/ADT/BitVector.h" |
57 | #include "llvm/ADT/DenseMap.h" |
58 | #include "llvm/ADT/DenseMapInfo.h" |
59 | #include "llvm/ADT/DenseSet.h" |
60 | #include "llvm/ADT/DepthFirstIterator.h" |
61 | #include "llvm/ADT/GraphTraits.h" |
62 | #include "llvm/ADT/Hashing.h" |
63 | #include "llvm/ADT/PointerIntPair.h" |
64 | #include "llvm/ADT/PostOrderIterator.h" |
65 | #include "llvm/ADT/SetOperations.h" |
66 | #include "llvm/ADT/SmallPtrSet.h" |
67 | #include "llvm/ADT/SmallVector.h" |
68 | #include "llvm/ADT/SparseBitVector.h" |
69 | #include "llvm/ADT/Statistic.h" |
70 | #include "llvm/ADT/iterator_range.h" |
71 | #include "llvm/Analysis/AliasAnalysis.h" |
72 | #include "llvm/Analysis/AssumptionCache.h" |
73 | #include "llvm/Analysis/CFGPrinter.h" |
74 | #include "llvm/Analysis/ConstantFolding.h" |
75 | #include "llvm/Analysis/GlobalsModRef.h" |
76 | #include "llvm/Analysis/InstructionSimplify.h" |
77 | #include "llvm/Analysis/MemoryBuiltins.h" |
78 | #include "llvm/Analysis/MemorySSA.h" |
79 | #include "llvm/Analysis/TargetLibraryInfo.h" |
80 | #include "llvm/Analysis/ValueTracking.h" |
81 | #include "llvm/IR/Argument.h" |
82 | #include "llvm/IR/BasicBlock.h" |
83 | #include "llvm/IR/Constant.h" |
84 | #include "llvm/IR/Constants.h" |
85 | #include "llvm/IR/Dominators.h" |
86 | #include "llvm/IR/Function.h" |
87 | #include "llvm/IR/InstrTypes.h" |
88 | #include "llvm/IR/Instruction.h" |
89 | #include "llvm/IR/Instructions.h" |
90 | #include "llvm/IR/IntrinsicInst.h" |
91 | #include "llvm/IR/PatternMatch.h" |
92 | #include "llvm/IR/Type.h" |
93 | #include "llvm/IR/Use.h" |
94 | #include "llvm/IR/User.h" |
95 | #include "llvm/IR/Value.h" |
96 | #include "llvm/Support/Allocator.h" |
97 | #include "llvm/Support/ArrayRecycler.h" |
98 | #include "llvm/Support/Casting.h" |
99 | #include "llvm/Support/CommandLine.h" |
100 | #include "llvm/Support/Debug.h" |
101 | #include "llvm/Support/DebugCounter.h" |
102 | #include "llvm/Support/ErrorHandling.h" |
103 | #include "llvm/Support/PointerLikeTypeTraits.h" |
104 | #include "llvm/Support/raw_ostream.h" |
105 | #include "llvm/Transforms/Scalar/GVNExpression.h" |
106 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
107 | #include "llvm/Transforms/Utils/Local.h" |
108 | #include "llvm/Transforms/Utils/PredicateInfo.h" |
109 | #include "llvm/Transforms/Utils/VNCoercion.h" |
110 | #include <algorithm> |
111 | #include <cassert> |
112 | #include <cstdint> |
113 | #include <iterator> |
114 | #include <map> |
115 | #include <memory> |
116 | #include <set> |
117 | #include <string> |
118 | #include <tuple> |
119 | #include <utility> |
120 | #include <vector> |
121 | |
122 | using namespace llvm; |
123 | using namespace llvm::GVNExpression; |
124 | using namespace llvm::VNCoercion; |
125 | using namespace llvm::PatternMatch; |
126 | |
127 | #define DEBUG_TYPE "newgvn" |
128 | |
129 | STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted" ); |
130 | STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted" ); |
131 | STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified" ); |
132 | STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same" ); |
133 | STATISTIC(NumGVNMaxIterations, |
134 | "Maximum Number of iterations it took to converge GVN" ); |
135 | STATISTIC(NumGVNLeaderChanges, "Number of leader changes" ); |
136 | STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes" ); |
137 | STATISTIC(NumGVNAvoidedSortedLeaderChanges, |
138 | "Number of avoided sorted leader changes" ); |
139 | STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated" ); |
140 | STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created" ); |
141 | STATISTIC(NumGVNPHIOfOpsEliminations, |
142 | "Number of things eliminated using PHI of ops" ); |
143 | DEBUG_COUNTER(VNCounter, "newgvn-vn" , |
144 | "Controls which instructions are value numbered" ); |
145 | DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi" , |
146 | "Controls which instructions we create phi of ops for" ); |
147 | // Currently store defining access refinement is too slow due to basicaa being |
148 | // egregiously slow. This flag lets us keep it working while we work on this |
149 | // issue. |
150 | static cl::opt<bool> EnableStoreRefinement("enable-store-refinement" , |
151 | cl::init(Val: false), cl::Hidden); |
152 | |
153 | /// Currently, the generation "phi of ops" can result in correctness issues. |
154 | static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops" , cl::init(Val: true), |
155 | cl::Hidden); |
156 | |
157 | //===----------------------------------------------------------------------===// |
158 | // GVN Pass |
159 | //===----------------------------------------------------------------------===// |
160 | |
161 | // Anchor methods. |
162 | namespace llvm { |
163 | namespace GVNExpression { |
164 | |
165 | Expression::~Expression() = default; |
166 | BasicExpression::~BasicExpression() = default; |
167 | CallExpression::~CallExpression() = default; |
168 | LoadExpression::~LoadExpression() = default; |
169 | StoreExpression::~StoreExpression() = default; |
170 | AggregateValueExpression::~AggregateValueExpression() = default; |
171 | PHIExpression::~PHIExpression() = default; |
172 | |
173 | } // end namespace GVNExpression |
174 | } // end namespace llvm |
175 | |
176 | namespace { |
177 | |
178 | // Tarjan's SCC finding algorithm with Nuutila's improvements |
179 | // SCCIterator is actually fairly complex for the simple thing we want. |
180 | // It also wants to hand us SCC's that are unrelated to the phi node we ask |
181 | // about, and have us process them there or risk redoing work. |
182 | // Graph traits over a filter iterator also doesn't work that well here. |
183 | // This SCC finder is specialized to walk use-def chains, and only follows |
184 | // instructions, |
185 | // not generic values (arguments, etc). |
186 | struct TarjanSCC { |
187 | TarjanSCC() : Components(1) {} |
188 | |
189 | void Start(const Instruction *Start) { |
190 | if (Root.lookup(Val: Start) == 0) |
191 | FindSCC(I: Start); |
192 | } |
193 | |
194 | const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const { |
195 | unsigned ComponentID = ValueToComponent.lookup(Val: V); |
196 | |
197 | assert(ComponentID > 0 && |
198 | "Asking for a component for a value we never processed" ); |
199 | return Components[ComponentID]; |
200 | } |
201 | |
202 | private: |
203 | void FindSCC(const Instruction *I) { |
204 | Root[I] = ++DFSNum; |
205 | // Store the DFS Number we had before it possibly gets incremented. |
206 | unsigned int OurDFS = DFSNum; |
207 | for (const auto &Op : I->operands()) { |
208 | if (auto *InstOp = dyn_cast<Instruction>(Val: Op)) { |
209 | if (Root.lookup(Val: Op) == 0) |
210 | FindSCC(I: InstOp); |
211 | if (!InComponent.count(Ptr: Op)) |
212 | Root[I] = std::min(a: Root.lookup(Val: I), b: Root.lookup(Val: Op)); |
213 | } |
214 | } |
215 | // See if we really were the root of a component, by seeing if we still have |
216 | // our DFSNumber. If we do, we are the root of the component, and we have |
217 | // completed a component. If we do not, we are not the root of a component, |
218 | // and belong on the component stack. |
219 | if (Root.lookup(Val: I) == OurDFS) { |
220 | unsigned ComponentID = Components.size(); |
221 | Components.resize(N: Components.size() + 1); |
222 | auto &Component = Components.back(); |
223 | Component.insert(Ptr: I); |
224 | LLVM_DEBUG(dbgs() << "Component root is " << *I << "\n" ); |
225 | InComponent.insert(Ptr: I); |
226 | ValueToComponent[I] = ComponentID; |
227 | // Pop a component off the stack and label it. |
228 | while (!Stack.empty() && Root.lookup(Val: Stack.back()) >= OurDFS) { |
229 | auto *Member = Stack.back(); |
230 | LLVM_DEBUG(dbgs() << "Component member is " << *Member << "\n" ); |
231 | Component.insert(Ptr: Member); |
232 | InComponent.insert(Ptr: Member); |
233 | ValueToComponent[Member] = ComponentID; |
234 | Stack.pop_back(); |
235 | } |
236 | } else { |
237 | // Part of a component, push to stack |
238 | Stack.push_back(Elt: I); |
239 | } |
240 | } |
241 | |
242 | unsigned int DFSNum = 1; |
243 | SmallPtrSet<const Value *, 8> InComponent; |
244 | DenseMap<const Value *, unsigned int> Root; |
245 | SmallVector<const Value *, 8> Stack; |
246 | |
247 | // Store the components as vector of ptr sets, because we need the topo order |
248 | // of SCC's, but not individual member order |
249 | SmallVector<SmallPtrSet<const Value *, 8>, 8> Components; |
250 | |
251 | DenseMap<const Value *, unsigned> ValueToComponent; |
252 | }; |
253 | |
254 | // Congruence classes represent the set of expressions/instructions |
255 | // that are all the same *during some scope in the function*. |
256 | // That is, because of the way we perform equality propagation, and |
257 | // because of memory value numbering, it is not correct to assume |
258 | // you can willy-nilly replace any member with any other at any |
259 | // point in the function. |
260 | // |
261 | // For any Value in the Member set, it is valid to replace any dominated member |
262 | // with that Value. |
263 | // |
264 | // Every congruence class has a leader, and the leader is used to symbolize |
265 | // instructions in a canonical way (IE every operand of an instruction that is a |
266 | // member of the same congruence class will always be replaced with leader |
267 | // during symbolization). To simplify symbolization, we keep the leader as a |
268 | // constant if class can be proved to be a constant value. Otherwise, the |
269 | // leader is the member of the value set with the smallest DFS number. Each |
270 | // congruence class also has a defining expression, though the expression may be |
271 | // null. If it exists, it can be used for forward propagation and reassociation |
272 | // of values. |
273 | |
274 | // For memory, we also track a representative MemoryAccess, and a set of memory |
275 | // members for MemoryPhis (which have no real instructions). Note that for |
276 | // memory, it seems tempting to try to split the memory members into a |
277 | // MemoryCongruenceClass or something. Unfortunately, this does not work |
278 | // easily. The value numbering of a given memory expression depends on the |
279 | // leader of the memory congruence class, and the leader of memory congruence |
280 | // class depends on the value numbering of a given memory expression. This |
281 | // leads to wasted propagation, and in some cases, missed optimization. For |
282 | // example: If we had value numbered two stores together before, but now do not, |
283 | // we move them to a new value congruence class. This in turn will move at one |
284 | // of the memorydefs to a new memory congruence class. Which in turn, affects |
285 | // the value numbering of the stores we just value numbered (because the memory |
286 | // congruence class is part of the value number). So while theoretically |
287 | // possible to split them up, it turns out to be *incredibly* complicated to get |
288 | // it to work right, because of the interdependency. While structurally |
289 | // slightly messier, it is algorithmically much simpler and faster to do what we |
290 | // do here, and track them both at once in the same class. |
291 | // Note: The default iterators for this class iterate over values |
292 | class CongruenceClass { |
293 | public: |
294 | using MemberType = Value; |
295 | using MemberSet = SmallPtrSet<MemberType *, 4>; |
296 | using MemoryMemberType = MemoryPhi; |
297 | using MemoryMemberSet = SmallPtrSet<const MemoryMemberType *, 2>; |
298 | |
299 | explicit CongruenceClass(unsigned ID) : ID(ID) {} |
300 | CongruenceClass(unsigned ID, std::pair<Value *, unsigned int> Leader, |
301 | const Expression *E) |
302 | : ID(ID), RepLeader(Leader), DefiningExpr(E) {} |
303 | |
304 | unsigned getID() const { return ID; } |
305 | |
306 | // True if this class has no members left. This is mainly used for assertion |
307 | // purposes, and for skipping empty classes. |
308 | bool isDead() const { |
309 | // If it's both dead from a value perspective, and dead from a memory |
310 | // perspective, it's really dead. |
311 | return empty() && memory_empty(); |
312 | } |
313 | |
314 | // Leader functions |
315 | Value *getLeader() const { return RepLeader.first; } |
316 | void setLeader(std::pair<Value *, unsigned int> Leader) { |
317 | RepLeader = Leader; |
318 | } |
319 | const std::pair<Value *, unsigned int> &getNextLeader() const { |
320 | return NextLeader; |
321 | } |
322 | void resetNextLeader() { NextLeader = {nullptr, ~0}; } |
323 | bool addPossibleLeader(std::pair<Value *, unsigned int> LeaderPair) { |
324 | if (LeaderPair.second < RepLeader.second) { |
325 | NextLeader = RepLeader; |
326 | RepLeader = LeaderPair; |
327 | return true; |
328 | } else if (LeaderPair.second < NextLeader.second) { |
329 | NextLeader = LeaderPair; |
330 | } |
331 | return false; |
332 | } |
333 | |
334 | Value *getStoredValue() const { return RepStoredValue; } |
335 | void setStoredValue(Value *Leader) { RepStoredValue = Leader; } |
336 | const MemoryAccess *getMemoryLeader() const { return RepMemoryAccess; } |
337 | void setMemoryLeader(const MemoryAccess *Leader) { RepMemoryAccess = Leader; } |
338 | |
339 | // Forward propagation info |
340 | const Expression *getDefiningExpr() const { return DefiningExpr; } |
341 | |
342 | // Value member set |
343 | bool empty() const { return Members.empty(); } |
344 | unsigned size() const { return Members.size(); } |
345 | MemberSet::const_iterator begin() const { return Members.begin(); } |
346 | MemberSet::const_iterator end() const { return Members.end(); } |
347 | void insert(MemberType *M) { Members.insert(Ptr: M); } |
348 | void erase(MemberType *M) { Members.erase(Ptr: M); } |
349 | void swap(MemberSet &Other) { Members.swap(RHS&: Other); } |
350 | |
351 | // Memory member set |
352 | bool memory_empty() const { return MemoryMembers.empty(); } |
353 | unsigned memory_size() const { return MemoryMembers.size(); } |
354 | MemoryMemberSet::const_iterator memory_begin() const { |
355 | return MemoryMembers.begin(); |
356 | } |
357 | MemoryMemberSet::const_iterator memory_end() const { |
358 | return MemoryMembers.end(); |
359 | } |
360 | iterator_range<MemoryMemberSet::const_iterator> memory() const { |
361 | return make_range(x: memory_begin(), y: memory_end()); |
362 | } |
363 | |
364 | void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(Ptr: M); } |
365 | void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(Ptr: M); } |
366 | |
367 | // Store count |
368 | unsigned getStoreCount() const { return StoreCount; } |
369 | void incStoreCount() { ++StoreCount; } |
370 | void decStoreCount() { |
371 | assert(StoreCount != 0 && "Store count went negative" ); |
372 | --StoreCount; |
373 | } |
374 | |
375 | // True if this class has no memory members. |
376 | bool definesNoMemory() const { return StoreCount == 0 && memory_empty(); } |
377 | |
378 | // Return true if two congruence classes are equivalent to each other. This |
379 | // means that every field but the ID number and the dead field are equivalent. |
380 | bool isEquivalentTo(const CongruenceClass *Other) const { |
381 | if (!Other) |
382 | return false; |
383 | if (this == Other) |
384 | return true; |
385 | |
386 | if (std::tie(args: StoreCount, args: RepLeader, args: RepStoredValue, args: RepMemoryAccess) != |
387 | std::tie(args: Other->StoreCount, args: Other->RepLeader, args: Other->RepStoredValue, |
388 | args: Other->RepMemoryAccess)) |
389 | return false; |
390 | if (DefiningExpr != Other->DefiningExpr) |
391 | if (!DefiningExpr || !Other->DefiningExpr || |
392 | *DefiningExpr != *Other->DefiningExpr) |
393 | return false; |
394 | |
395 | if (Members.size() != Other->Members.size()) |
396 | return false; |
397 | |
398 | return llvm::set_is_subset(S1: Members, S2: Other->Members); |
399 | } |
400 | |
401 | private: |
402 | unsigned ID; |
403 | |
404 | // Representative leader and its corresponding RPO number. |
405 | // The leader must have the lowest RPO number. |
406 | std::pair<Value *, unsigned int> RepLeader = {nullptr, ~0U}; |
407 | |
408 | // The most dominating leader after our current leader (given by the RPO |
409 | // number), because the member set is not sorted and is expensive to keep |
410 | // sorted all the time. |
411 | std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; |
412 | |
413 | // If this is represented by a store, the value of the store. |
414 | Value *RepStoredValue = nullptr; |
415 | |
416 | // If this class contains MemoryDefs or MemoryPhis, this is the leading memory |
417 | // access. |
418 | const MemoryAccess *RepMemoryAccess = nullptr; |
419 | |
420 | // Defining Expression. |
421 | const Expression *DefiningExpr = nullptr; |
422 | |
423 | // Actual members of this class. |
424 | MemberSet Members; |
425 | |
426 | // This is the set of MemoryPhis that exist in the class. MemoryDefs and |
427 | // MemoryUses have real instructions representing them, so we only need to |
428 | // track MemoryPhis here. |
429 | MemoryMemberSet MemoryMembers; |
430 | |
431 | // Number of stores in this congruence class. |
432 | // This is used so we can detect store equivalence changes properly. |
433 | int StoreCount = 0; |
434 | }; |
435 | |
436 | } // end anonymous namespace |
437 | |
438 | namespace llvm { |
439 | |
440 | struct ExactEqualsExpression { |
441 | const Expression &E; |
442 | |
443 | explicit ExactEqualsExpression(const Expression &E) : E(E) {} |
444 | |
445 | hash_code getComputedHash() const { return E.getComputedHash(); } |
446 | |
447 | bool operator==(const Expression &Other) const { |
448 | return E.exactlyEquals(Other); |
449 | } |
450 | }; |
451 | |
452 | template <> struct DenseMapInfo<const Expression *> { |
453 | static const Expression *getEmptyKey() { |
454 | auto Val = static_cast<uintptr_t>(-1); |
455 | Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; |
456 | return reinterpret_cast<const Expression *>(Val); |
457 | } |
458 | |
459 | static const Expression *getTombstoneKey() { |
460 | auto Val = static_cast<uintptr_t>(~1U); |
461 | Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; |
462 | return reinterpret_cast<const Expression *>(Val); |
463 | } |
464 | |
465 | static unsigned getHashValue(const Expression *E) { |
466 | return E->getComputedHash(); |
467 | } |
468 | |
469 | static unsigned getHashValue(const ExactEqualsExpression &E) { |
470 | return E.getComputedHash(); |
471 | } |
472 | |
473 | static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { |
474 | if (RHS == getTombstoneKey() || RHS == getEmptyKey()) |
475 | return false; |
476 | return LHS == *RHS; |
477 | } |
478 | |
479 | static bool isEqual(const Expression *LHS, const Expression *RHS) { |
480 | if (LHS == RHS) |
481 | return true; |
482 | if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || |
483 | LHS == getEmptyKey() || RHS == getEmptyKey()) |
484 | return false; |
485 | // Compare hashes before equality. This is *not* what the hashtable does, |
486 | // since it is computing it modulo the number of buckets, whereas we are |
487 | // using the full hash keyspace. Since the hashes are precomputed, this |
488 | // check is *much* faster than equality. |
489 | if (LHS->getComputedHash() != RHS->getComputedHash()) |
490 | return false; |
491 | return *LHS == *RHS; |
492 | } |
493 | }; |
494 | |
495 | } // end namespace llvm |
496 | |
497 | namespace { |
498 | |
499 | class NewGVN { |
500 | Function &F; |
501 | DominatorTree *DT = nullptr; |
502 | const TargetLibraryInfo *TLI = nullptr; |
503 | AliasAnalysis *AA = nullptr; |
504 | MemorySSA *MSSA = nullptr; |
505 | MemorySSAWalker *MSSAWalker = nullptr; |
506 | AssumptionCache *AC = nullptr; |
507 | const DataLayout &DL; |
508 | |
509 | // These are the only two things the create* functions should have |
510 | // side-effects on due to allocating memory. |
511 | mutable BumpPtrAllocator ExpressionAllocator; |
512 | mutable ArrayRecycler<Value *> ArgRecycler; |
513 | mutable TarjanSCC SCCFinder; |
514 | |
515 | std::unique_ptr<PredicateInfo> PredInfo; |
516 | const SimplifyQuery SQ; |
517 | |
518 | // Number of function arguments, used by ranking |
519 | unsigned int NumFuncArgs = 0; |
520 | |
521 | // RPOOrdering of basic blocks |
522 | DenseMap<const DomTreeNode *, unsigned> RPOOrdering; |
523 | |
524 | // Congruence class info. |
525 | |
526 | // This class is called INITIAL in the paper. It is the class everything |
527 | // startsout in, and represents any value. Being an optimistic analysis, |
528 | // anything in the TOP class has the value TOP, which is indeterminate and |
529 | // equivalent to everything. |
530 | CongruenceClass *TOPClass = nullptr; |
531 | std::vector<CongruenceClass *> CongruenceClasses; |
532 | unsigned NextCongruenceNum = 0; |
533 | |
534 | // Value Mappings. |
535 | DenseMap<Value *, CongruenceClass *> ValueToClass; |
536 | DenseMap<Value *, const Expression *> ValueToExpression; |
537 | |
538 | // Value PHI handling, used to make equivalence between phi(op, op) and |
539 | // op(phi, phi). |
540 | // These mappings just store various data that would normally be part of the |
541 | // IR. |
542 | SmallPtrSet<const Instruction *, 8> PHINodeUses; |
543 | |
544 | // The cached results, in general, are only valid for the specific block where |
545 | // they were computed. The unsigned part of the key is a unique block |
546 | // identifier |
547 | DenseMap<std::pair<const Value *, unsigned>, bool> OpSafeForPHIOfOps; |
548 | unsigned CacheIdx; |
549 | |
550 | // Map a temporary instruction we created to a parent block. |
551 | DenseMap<const Value *, BasicBlock *> TempToBlock; |
552 | |
553 | // Map between the already in-program instructions and the temporary phis we |
554 | // created that they are known equivalent to. |
555 | DenseMap<const Value *, PHINode *> RealToTemp; |
556 | |
557 | // In order to know when we should re-process instructions that have |
558 | // phi-of-ops, we track the set of expressions that they needed as |
559 | // leaders. When we discover new leaders for those expressions, we process the |
560 | // associated phi-of-op instructions again in case they have changed. The |
561 | // other way they may change is if they had leaders, and those leaders |
562 | // disappear. However, at the point they have leaders, there are uses of the |
563 | // relevant operands in the created phi node, and so they will get reprocessed |
564 | // through the normal user marking we perform. |
565 | mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers; |
566 | DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>> |
567 | ExpressionToPhiOfOps; |
568 | |
569 | // Map from temporary operation to MemoryAccess. |
570 | DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory; |
571 | |
572 | // Set of all temporary instructions we created. |
573 | // Note: This will include instructions that were just created during value |
574 | // numbering. The way to test if something is using them is to check |
575 | // RealToTemp. |
576 | DenseSet<Instruction *> AllTempInstructions; |
577 | |
578 | // This is the set of instructions to revisit on a reachability change. At |
579 | // the end of the main iteration loop it will contain at least all the phi of |
580 | // ops instructions that will be changed to phis, as well as regular phis. |
581 | // During the iteration loop, it may contain other things, such as phi of ops |
582 | // instructions that used edge reachability to reach a result, and so need to |
583 | // be revisited when the edge changes, independent of whether the phi they |
584 | // depended on changes. |
585 | DenseMap<BasicBlock *, SparseBitVector<>> RevisitOnReachabilityChange; |
586 | |
587 | // Mapping from predicate info we used to the instructions we used it with. |
588 | // In order to correctly ensure propagation, we must keep track of what |
589 | // comparisons we used, so that when the values of the comparisons change, we |
590 | // propagate the information to the places we used the comparison. |
591 | mutable DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> |
592 | PredicateToUsers; |
593 | |
594 | // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for |
595 | // stores, we no longer can rely solely on the def-use chains of MemorySSA. |
596 | mutable DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> |
597 | MemoryToUsers; |
598 | |
599 | // A table storing which memorydefs/phis represent a memory state provably |
600 | // equivalent to another memory state. |
601 | // We could use the congruence class machinery, but the MemoryAccess's are |
602 | // abstract memory states, so they can only ever be equivalent to each other, |
603 | // and not to constants, etc. |
604 | DenseMap<const MemoryAccess *, CongruenceClass *> MemoryAccessToClass; |
605 | |
606 | // We could, if we wanted, build MemoryPhiExpressions and |
607 | // MemoryVariableExpressions, etc, and value number them the same way we value |
608 | // number phi expressions. For the moment, this seems like overkill. They |
609 | // can only exist in one of three states: they can be TOP (equal to |
610 | // everything), Equivalent to something else, or unique. Because we do not |
611 | // create expressions for them, we need to simulate leader change not just |
612 | // when they change class, but when they change state. Note: We can do the |
613 | // same thing for phis, and avoid having phi expressions if we wanted, We |
614 | // should eventually unify in one direction or the other, so this is a little |
615 | // bit of an experiment in which turns out easier to maintain. |
616 | enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; |
617 | DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; |
618 | |
619 | enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; |
620 | mutable DenseMap<const Instruction *, InstCycleState> InstCycleState; |
621 | |
622 | // Expression to class mapping. |
623 | using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; |
624 | ExpressionClassMap ExpressionToClass; |
625 | |
626 | // We have a single expression that represents currently DeadExpressions. |
627 | // For dead expressions we can prove will stay dead, we mark them with |
628 | // DFS number zero. However, it's possible in the case of phi nodes |
629 | // for us to assume/prove all arguments are dead during fixpointing. |
630 | // We use DeadExpression for that case. |
631 | DeadExpression *SingletonDeadExpression = nullptr; |
632 | |
633 | // Which values have changed as a result of leader changes. |
634 | SmallPtrSet<Value *, 8> LeaderChanges; |
635 | |
636 | // Reachability info. |
637 | using BlockEdge = BasicBlockEdge; |
638 | DenseSet<BlockEdge> ReachableEdges; |
639 | SmallPtrSet<const BasicBlock *, 8> ReachableBlocks; |
640 | |
641 | // This is a bitvector because, on larger functions, we may have |
642 | // thousands of touched instructions at once (entire blocks, |
643 | // instructions with hundreds of uses, etc). Even with optimization |
644 | // for when we mark whole blocks as touched, when this was a |
645 | // SmallPtrSet or DenseSet, for some functions, we spent >20% of all |
646 | // the time in GVN just managing this list. The bitvector, on the |
647 | // other hand, efficiently supports test/set/clear of both |
648 | // individual and ranges, as well as "find next element" This |
649 | // enables us to use it as a worklist with essentially 0 cost. |
650 | BitVector TouchedInstructions; |
651 | |
652 | DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; |
653 | mutable DenseMap<const IntrinsicInst *, const Value *> IntrinsicInstPred; |
654 | |
655 | #ifndef NDEBUG |
656 | // Debugging for how many times each block and instruction got processed. |
657 | DenseMap<const Value *, unsigned> ProcessedCount; |
658 | #endif |
659 | |
660 | // DFS info. |
661 | // This contains a mapping from Instructions to DFS numbers. |
662 | // The numbering starts at 1. An instruction with DFS number zero |
663 | // means that the instruction is dead. |
664 | DenseMap<const Value *, unsigned> InstrDFS; |
665 | |
666 | // This contains the mapping DFS numbers to instructions. |
667 | SmallVector<Value *, 32> DFSToInstr; |
668 | |
669 | // Deletion info. |
670 | SmallPtrSet<Instruction *, 8> InstructionsToErase; |
671 | |
672 | public: |
673 | NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, |
674 | TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, |
675 | const DataLayout &DL) |
676 | : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL), |
677 | // Reuse ExpressionAllocator for PredicateInfo as well. |
678 | PredInfo( |
679 | std::make_unique<PredicateInfo>(args&: F, args&: *DT, args&: *AC, args&: ExpressionAllocator)), |
680 | SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false, |
681 | /*CanUseUndef=*/false) {} |
682 | |
683 | bool runGVN(); |
684 | |
685 | private: |
686 | /// Helper struct return a Expression with an optional extra dependency. |
687 | struct ExprResult { |
688 | const Expression *Expr; |
689 | Value *; |
690 | const PredicateBase *PredDep; |
691 | |
692 | ExprResult(const Expression *Expr, Value * = nullptr, |
693 | const PredicateBase *PredDep = nullptr) |
694 | : Expr(Expr), ExtraDep(ExtraDep), PredDep(PredDep) {} |
695 | ExprResult(const ExprResult &) = delete; |
696 | ExprResult(ExprResult &&Other) |
697 | : Expr(Other.Expr), ExtraDep(Other.ExtraDep), PredDep(Other.PredDep) { |
698 | Other.Expr = nullptr; |
699 | Other.ExtraDep = nullptr; |
700 | Other.PredDep = nullptr; |
701 | } |
702 | ExprResult &operator=(const ExprResult &Other) = delete; |
703 | ExprResult &operator=(ExprResult &&Other) = delete; |
704 | |
705 | ~ExprResult() { assert(!ExtraDep && "unhandled ExtraDep" ); } |
706 | |
707 | operator bool() const { return Expr; } |
708 | |
709 | static ExprResult none() { return {nullptr, nullptr, nullptr}; } |
710 | static ExprResult some(const Expression *Expr, Value * = nullptr) { |
711 | return {Expr, ExtraDep, nullptr}; |
712 | } |
713 | static ExprResult some(const Expression *Expr, |
714 | const PredicateBase *PredDep) { |
715 | return {Expr, nullptr, PredDep}; |
716 | } |
717 | static ExprResult some(const Expression *Expr, Value *, |
718 | const PredicateBase *PredDep) { |
719 | return {Expr, ExtraDep, PredDep}; |
720 | } |
721 | }; |
722 | |
723 | // Expression handling. |
724 | ExprResult createExpression(Instruction *) const; |
725 | const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, |
726 | Instruction *) const; |
727 | |
728 | // Our canonical form for phi arguments is a pair of incoming value, incoming |
729 | // basic block. |
730 | using ValPair = std::pair<Value *, BasicBlock *>; |
731 | |
732 | PHIExpression *createPHIExpression(ArrayRef<ValPair>, const Instruction *, |
733 | BasicBlock *, bool &HasBackEdge, |
734 | bool &OriginalOpsConstant) const; |
735 | const DeadExpression *createDeadExpression() const; |
736 | const VariableExpression *createVariableExpression(Value *) const; |
737 | const ConstantExpression *createConstantExpression(Constant *) const; |
738 | const Expression *createVariableOrConstant(Value *V) const; |
739 | const UnknownExpression *createUnknownExpression(Instruction *) const; |
740 | const StoreExpression *createStoreExpression(StoreInst *, |
741 | const MemoryAccess *) const; |
742 | LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, |
743 | const MemoryAccess *) const; |
744 | const CallExpression *createCallExpression(CallInst *, |
745 | const MemoryAccess *) const; |
746 | const AggregateValueExpression * |
747 | createAggregateValueExpression(Instruction *) const; |
748 | bool setBasicExpressionInfo(Instruction *, BasicExpression *) const; |
749 | |
750 | // Congruence class handling. |
751 | CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { |
752 | // Set RPO to 0 for values that are always available (constants and function |
753 | // args). These should always be made leader. |
754 | unsigned LeaderDFS = 0; |
755 | |
756 | // If Leader is not specified, either we have a memory class or the leader |
757 | // will be set later. Otherwise, if Leader is an Instruction, set LeaderDFS |
758 | // to its RPO number. |
759 | if (!Leader) |
760 | LeaderDFS = ~0; |
761 | else if (auto *I = dyn_cast<Instruction>(Val: Leader)) |
762 | LeaderDFS = InstrToDFSNum(V: I); |
763 | auto *result = |
764 | new CongruenceClass(NextCongruenceNum++, {Leader, LeaderDFS}, E); |
765 | CongruenceClasses.emplace_back(args&: result); |
766 | return result; |
767 | } |
768 | |
769 | CongruenceClass *createMemoryClass(MemoryAccess *MA) { |
770 | auto *CC = createCongruenceClass(Leader: nullptr, E: nullptr); |
771 | CC->setMemoryLeader(MA); |
772 | return CC; |
773 | } |
774 | |
775 | CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) { |
776 | auto *CC = getMemoryClass(MA); |
777 | if (CC->getMemoryLeader() != MA) |
778 | CC = createMemoryClass(MA); |
779 | return CC; |
780 | } |
781 | |
782 | CongruenceClass *createSingletonCongruenceClass(Value *Member) { |
783 | CongruenceClass *CClass = createCongruenceClass(Leader: Member, E: nullptr); |
784 | CClass->insert(M: Member); |
785 | ValueToClass[Member] = CClass; |
786 | return CClass; |
787 | } |
788 | |
789 | void initializeCongruenceClasses(Function &F); |
790 | const Expression *makePossiblePHIOfOps(Instruction *, |
791 | SmallPtrSetImpl<Value *> &); |
792 | Value *findLeaderForInst(Instruction *ValueOp, |
793 | SmallPtrSetImpl<Value *> &Visited, |
794 | MemoryAccess *MemAccess, Instruction *OrigInst, |
795 | BasicBlock *PredBB); |
796 | bool OpIsSafeForPHIOfOps(Value *Op, const BasicBlock *PHIBlock, |
797 | SmallPtrSetImpl<const Value *> &); |
798 | void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); |
799 | void removePhiOfOps(Instruction *I, PHINode *PHITemp); |
800 | |
801 | // Value number an Instruction or MemoryPhi. |
802 | void valueNumberMemoryPhi(MemoryPhi *); |
803 | void valueNumberInstruction(Instruction *); |
804 | |
805 | // Symbolic evaluation. |
806 | ExprResult checkExprResults(Expression *, Instruction *, Value *) const; |
807 | ExprResult performSymbolicEvaluation(Instruction *, |
808 | SmallPtrSetImpl<Value *> &) const; |
809 | const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, |
810 | Instruction *, |
811 | MemoryAccess *) const; |
812 | const Expression *performSymbolicLoadEvaluation(Instruction *) const; |
813 | const Expression *performSymbolicStoreEvaluation(Instruction *) const; |
814 | ExprResult performSymbolicCallEvaluation(Instruction *) const; |
815 | void sortPHIOps(MutableArrayRef<ValPair> Ops) const; |
816 | const Expression *performSymbolicPHIEvaluation(ArrayRef<ValPair>, |
817 | Instruction *I, |
818 | BasicBlock *PHIBlock) const; |
819 | const Expression *performSymbolicAggrValueEvaluation(Instruction *) const; |
820 | ExprResult performSymbolicCmpEvaluation(Instruction *) const; |
821 | ExprResult performSymbolicPredicateInfoEvaluation(IntrinsicInst *) const; |
822 | |
823 | // Congruence finding. |
824 | bool someEquivalentDominates(const Instruction *, const Instruction *) const; |
825 | Value *lookupOperandLeader(Value *) const; |
826 | CongruenceClass *getClassForExpression(const Expression *E) const; |
827 | void performCongruenceFinding(Instruction *, const Expression *); |
828 | void moveValueToNewCongruenceClass(Instruction *, const Expression *, |
829 | CongruenceClass *, CongruenceClass *); |
830 | void moveMemoryToNewCongruenceClass(Instruction *, MemoryAccess *, |
831 | CongruenceClass *, CongruenceClass *); |
832 | Value *getNextValueLeader(CongruenceClass *) const; |
833 | const MemoryAccess *getNextMemoryLeader(CongruenceClass *) const; |
834 | bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); |
835 | CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; |
836 | const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; |
837 | bool isMemoryAccessTOP(const MemoryAccess *) const; |
838 | |
839 | // Ranking |
840 | unsigned int getRank(const Value *) const; |
841 | bool shouldSwapOperands(const Value *, const Value *) const; |
842 | bool shouldSwapOperandsForIntrinsic(const Value *, const Value *, |
843 | const IntrinsicInst *I) const; |
844 | |
845 | // Reachability handling. |
846 | void updateReachableEdge(BasicBlock *, BasicBlock *); |
847 | void processOutgoingEdges(Instruction *, BasicBlock *); |
848 | Value *findConditionEquivalence(Value *) const; |
849 | |
850 | // Elimination. |
851 | struct ValueDFS; |
852 | void convertClassToDFSOrdered(const CongruenceClass &, |
853 | SmallVectorImpl<ValueDFS> &, |
854 | DenseMap<const Value *, unsigned int> &, |
855 | SmallPtrSetImpl<Instruction *> &) const; |
856 | void convertClassToLoadsAndStores(const CongruenceClass &, |
857 | SmallVectorImpl<ValueDFS> &) const; |
858 | |
859 | bool eliminateInstructions(Function &); |
860 | void replaceInstruction(Instruction *, Value *); |
861 | void markInstructionForDeletion(Instruction *); |
862 | void deleteInstructionsInBlock(BasicBlock *); |
863 | Value *findPHIOfOpsLeader(const Expression *, const Instruction *, |
864 | const BasicBlock *) const; |
865 | |
866 | // Various instruction touch utilities |
867 | template <typename Map, typename KeyType> |
868 | void touchAndErase(Map &, const KeyType &); |
869 | void markUsersTouched(Value *); |
870 | void markMemoryUsersTouched(const MemoryAccess *); |
871 | void markMemoryDefTouched(const MemoryAccess *); |
872 | void markPredicateUsersTouched(Instruction *); |
873 | void markValueLeaderChangeTouched(CongruenceClass *CC); |
874 | void markMemoryLeaderChangeTouched(CongruenceClass *CC); |
875 | void markPhiOfOpsChanged(const Expression *E); |
876 | void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; |
877 | void addAdditionalUsers(Value *To, Value *User) const; |
878 | void addAdditionalUsers(ExprResult &Res, Instruction *User) const; |
879 | |
880 | // Main loop of value numbering |
881 | void iterateTouchedInstructions(); |
882 | |
883 | // Utilities. |
884 | void cleanupTables(); |
885 | std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); |
886 | void updateProcessedCount(const Value *V); |
887 | void verifyMemoryCongruency() const; |
888 | void verifyIterationSettled(Function &F); |
889 | void verifyStoreExpressions() const; |
890 | bool singleReachablePHIPath(SmallPtrSet<const MemoryAccess *, 8> &, |
891 | const MemoryAccess *, const MemoryAccess *) const; |
892 | BasicBlock *getBlockForValue(Value *V) const; |
893 | void deleteExpression(const Expression *E) const; |
894 | MemoryUseOrDef *getMemoryAccess(const Instruction *) const; |
895 | MemoryPhi *getMemoryAccess(const BasicBlock *) const; |
896 | template <class T, class Range> T *getMinDFSOfRange(const Range &) const; |
897 | |
898 | unsigned InstrToDFSNum(const Value *V) const { |
899 | assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses" ); |
900 | return InstrDFS.lookup(Val: V); |
901 | } |
902 | |
903 | unsigned InstrToDFSNum(const MemoryAccess *MA) const { |
904 | return MemoryToDFSNum(MA); |
905 | } |
906 | |
907 | Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; } |
908 | |
909 | // Given a MemoryAccess, return the relevant instruction DFS number. Note: |
910 | // This deliberately takes a value so it can be used with Use's, which will |
911 | // auto-convert to Value's but not to MemoryAccess's. |
912 | unsigned MemoryToDFSNum(const Value *MA) const { |
913 | assert(isa<MemoryAccess>(MA) && |
914 | "This should not be used with instructions" ); |
915 | return isa<MemoryUseOrDef>(Val: MA) |
916 | ? InstrToDFSNum(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst()) |
917 | : InstrDFS.lookup(Val: MA); |
918 | } |
919 | |
920 | bool isCycleFree(const Instruction *) const; |
921 | bool isBackedge(BasicBlock *From, BasicBlock *To) const; |
922 | |
923 | // Debug counter info. When verifying, we have to reset the value numbering |
924 | // debug counter to the same state it started in to get the same results. |
925 | DebugCounter::CounterState StartingVNCounter; |
926 | }; |
927 | |
928 | } // end anonymous namespace |
929 | |
930 | template <typename T> |
931 | static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) { |
932 | if (!isa<LoadExpression>(Val: RHS) && !isa<StoreExpression>(Val: RHS)) |
933 | return false; |
934 | return LHS.MemoryExpression::equals(RHS); |
935 | } |
936 | |
937 | bool LoadExpression::equals(const Expression &Other) const { |
938 | return equalsLoadStoreHelper(LHS: *this, RHS: Other); |
939 | } |
940 | |
941 | bool StoreExpression::equals(const Expression &Other) const { |
942 | if (!equalsLoadStoreHelper(LHS: *this, RHS: Other)) |
943 | return false; |
944 | // Make sure that store vs store includes the value operand. |
945 | if (const auto *S = dyn_cast<StoreExpression>(Val: &Other)) |
946 | if (getStoredValue() != S->getStoredValue()) |
947 | return false; |
948 | return true; |
949 | } |
950 | |
951 | bool CallExpression::equals(const Expression &Other) const { |
952 | if (!MemoryExpression::equals(Other)) |
953 | return false; |
954 | |
955 | if (auto *RHS = dyn_cast<CallExpression>(Val: &Other)) |
956 | return Call->getAttributes() |
957 | .intersectWith(C&: Call->getContext(), Other: RHS->Call->getAttributes()) |
958 | .has_value(); |
959 | |
960 | return false; |
961 | } |
962 | |
963 | // Determine if the edge From->To is a backedge |
964 | bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { |
965 | return From == To || |
966 | RPOOrdering.lookup(Val: DT->getNode(BB: From)) >= |
967 | RPOOrdering.lookup(Val: DT->getNode(BB: To)); |
968 | } |
969 | |
970 | #ifndef NDEBUG |
971 | static std::string getBlockName(const BasicBlock *B) { |
972 | return DOTGraphTraits<DOTFuncInfo *>::getSimpleNodeLabel(B, nullptr); |
973 | } |
974 | #endif |
975 | |
976 | // Get a MemoryAccess for an instruction, fake or real. |
977 | MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { |
978 | auto *Result = MSSA->getMemoryAccess(I); |
979 | return Result ? Result : TempToMemory.lookup(Val: I); |
980 | } |
981 | |
982 | // Get a MemoryPhi for a basic block. These are all real. |
983 | MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { |
984 | return MSSA->getMemoryAccess(BB); |
985 | } |
986 | |
987 | // Get the basic block from an instruction/memory value. |
988 | BasicBlock *NewGVN::getBlockForValue(Value *V) const { |
989 | if (auto *I = dyn_cast<Instruction>(Val: V)) { |
990 | auto *Parent = I->getParent(); |
991 | if (Parent) |
992 | return Parent; |
993 | Parent = TempToBlock.lookup(Val: V); |
994 | assert(Parent && "Every fake instruction should have a block" ); |
995 | return Parent; |
996 | } |
997 | |
998 | auto *MP = dyn_cast<MemoryPhi>(Val: V); |
999 | assert(MP && "Should have been an instruction or a MemoryPhi" ); |
1000 | return MP->getBlock(); |
1001 | } |
1002 | |
1003 | // Delete a definitely dead expression, so it can be reused by the expression |
1004 | // allocator. Some of these are not in creation functions, so we have to accept |
1005 | // const versions. |
1006 | void NewGVN::deleteExpression(const Expression *E) const { |
1007 | assert(isa<BasicExpression>(E)); |
1008 | auto *BE = cast<BasicExpression>(Val: E); |
1009 | const_cast<BasicExpression *>(BE)->deallocateOperands(Recycler&: ArgRecycler); |
1010 | ExpressionAllocator.Deallocate(Ptr: E); |
1011 | } |
1012 | |
1013 | // If V is a predicateinfo copy, get the thing it is a copy of. |
1014 | static Value *getCopyOf(const Value *V) { |
1015 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) |
1016 | if (II->getIntrinsicID() == Intrinsic::ssa_copy) |
1017 | return II->getOperand(i_nocapture: 0); |
1018 | return nullptr; |
1019 | } |
1020 | |
1021 | // Return true if V is really PN, even accounting for predicateinfo copies. |
1022 | static bool isCopyOfPHI(const Value *V, const PHINode *PN) { |
1023 | return V == PN || getCopyOf(V) == PN; |
1024 | } |
1025 | |
1026 | static bool isCopyOfAPHI(const Value *V) { |
1027 | auto *CO = getCopyOf(V); |
1028 | return CO && isa<PHINode>(Val: CO); |
1029 | } |
1030 | |
1031 | // Sort PHI Operands into a canonical order. What we use here is an RPO |
1032 | // order. The BlockInstRange numbers are generated in an RPO walk of the basic |
1033 | // blocks. |
1034 | void NewGVN::sortPHIOps(MutableArrayRef<ValPair> Ops) const { |
1035 | llvm::sort(C&: Ops, Comp: [&](const ValPair &P1, const ValPair &P2) { |
1036 | return BlockInstRange.lookup(Val: P1.second).first < |
1037 | BlockInstRange.lookup(Val: P2.second).first; |
1038 | }); |
1039 | } |
1040 | |
1041 | // Return true if V is a value that will always be available (IE can |
1042 | // be placed anywhere) in the function. We don't do globals here |
1043 | // because they are often worse to put in place. |
1044 | static bool alwaysAvailable(Value *V) { |
1045 | return isa<Constant>(Val: V) || isa<Argument>(Val: V); |
1046 | } |
1047 | |
1048 | // Create a PHIExpression from an array of {incoming edge, value} pairs. I is |
1049 | // the original instruction we are creating a PHIExpression for (but may not be |
1050 | // a phi node). We require, as an invariant, that all the PHIOperands in the |
1051 | // same block are sorted the same way. sortPHIOps will sort them into a |
1052 | // canonical order. |
1053 | PHIExpression *NewGVN::createPHIExpression(ArrayRef<ValPair> PHIOperands, |
1054 | const Instruction *I, |
1055 | BasicBlock *PHIBlock, |
1056 | bool &HasBackedge, |
1057 | bool &OriginalOpsConstant) const { |
1058 | unsigned NumOps = PHIOperands.size(); |
1059 | auto *E = new (ExpressionAllocator) PHIExpression(NumOps, PHIBlock); |
1060 | |
1061 | E->allocateOperands(Recycler&: ArgRecycler, Allocator&: ExpressionAllocator); |
1062 | E->setType(PHIOperands.begin()->first->getType()); |
1063 | E->setOpcode(Instruction::PHI); |
1064 | |
1065 | // Filter out unreachable phi operands. |
1066 | auto Filtered = make_filter_range(Range&: PHIOperands, Pred: [&](const ValPair &P) { |
1067 | auto *BB = P.second; |
1068 | if (auto *PHIOp = dyn_cast<PHINode>(Val: I)) |
1069 | if (isCopyOfPHI(V: P.first, PN: PHIOp)) |
1070 | return false; |
1071 | if (!ReachableEdges.count(V: {BB, PHIBlock})) |
1072 | return false; |
1073 | // Things in TOPClass are equivalent to everything. |
1074 | if (ValueToClass.lookup(Val: P.first) == TOPClass) |
1075 | return false; |
1076 | OriginalOpsConstant = OriginalOpsConstant && isa<Constant>(Val: P.first); |
1077 | HasBackedge = HasBackedge || isBackedge(From: BB, To: PHIBlock); |
1078 | return lookupOperandLeader(P.first) != I; |
1079 | }); |
1080 | llvm::transform(Range&: Filtered, d_first: op_inserter(E), F: [&](const ValPair &P) -> Value * { |
1081 | return lookupOperandLeader(P.first); |
1082 | }); |
1083 | return E; |
1084 | } |
1085 | |
1086 | // Set basic expression info (Arguments, type, opcode) for Expression |
1087 | // E from Instruction I in block B. |
1088 | bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { |
1089 | bool AllConstant = true; |
1090 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) |
1091 | E->setType(GEP->getSourceElementType()); |
1092 | else |
1093 | E->setType(I->getType()); |
1094 | E->setOpcode(I->getOpcode()); |
1095 | E->allocateOperands(Recycler&: ArgRecycler, Allocator&: ExpressionAllocator); |
1096 | |
1097 | // Transform the operand array into an operand leader array, and keep track of |
1098 | // whether all members are constant. |
1099 | std::transform(first: I->op_begin(), last: I->op_end(), result: op_inserter(E), unary_op: [&](Value *O) { |
1100 | auto Operand = lookupOperandLeader(O); |
1101 | AllConstant = AllConstant && isa<Constant>(Val: Operand); |
1102 | return Operand; |
1103 | }); |
1104 | |
1105 | return AllConstant; |
1106 | } |
1107 | |
1108 | const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, |
1109 | Value *Arg1, Value *Arg2, |
1110 | Instruction *I) const { |
1111 | auto *E = new (ExpressionAllocator) BasicExpression(2); |
1112 | // TODO: we need to remove context instruction after Value Tracking |
1113 | // can run without context instruction |
1114 | const SimplifyQuery Q = SQ.getWithInstruction(I); |
1115 | |
1116 | E->setType(T); |
1117 | E->setOpcode(Opcode); |
1118 | E->allocateOperands(Recycler&: ArgRecycler, Allocator&: ExpressionAllocator); |
1119 | if (Instruction::isCommutative(Opcode)) { |
1120 | // Ensure that commutative instructions that only differ by a permutation |
1121 | // of their operands get the same value number by sorting the operand value |
1122 | // numbers. Since all commutative instructions have two operands it is more |
1123 | // efficient to sort by hand rather than using, say, std::sort. |
1124 | if (shouldSwapOperands(Arg1, Arg2)) |
1125 | std::swap(a&: Arg1, b&: Arg2); |
1126 | } |
1127 | E->op_push_back(Arg: lookupOperandLeader(Arg1)); |
1128 | E->op_push_back(Arg: lookupOperandLeader(Arg2)); |
1129 | |
1130 | Value *V = simplifyBinOp(Opcode, LHS: E->getOperand(N: 0), RHS: E->getOperand(N: 1), Q); |
1131 | if (auto Simplified = checkExprResults(E, I, V)) { |
1132 | addAdditionalUsers(Res&: Simplified, User: I); |
1133 | return Simplified.Expr; |
1134 | } |
1135 | return E; |
1136 | } |
1137 | |
1138 | // Take a Value returned by simplification of Expression E/Instruction |
1139 | // I, and see if it resulted in a simpler expression. If so, return |
1140 | // that expression. |
1141 | NewGVN::ExprResult NewGVN::checkExprResults(Expression *E, Instruction *I, |
1142 | Value *V) const { |
1143 | if (!V) |
1144 | return ExprResult::none(); |
1145 | |
1146 | if (auto *C = dyn_cast<Constant>(Val: V)) { |
1147 | if (I) |
1148 | LLVM_DEBUG(dbgs() << "Simplified " << *I << " to " |
1149 | << " constant " << *C << "\n" ); |
1150 | NumGVNOpsSimplified++; |
1151 | assert(isa<BasicExpression>(E) && |
1152 | "We should always have had a basic expression here" ); |
1153 | deleteExpression(E); |
1154 | return ExprResult::some(Expr: createConstantExpression(C)); |
1155 | } else if (isa<Argument>(Val: V) || isa<GlobalVariable>(Val: V)) { |
1156 | if (I) |
1157 | LLVM_DEBUG(dbgs() << "Simplified " << *I << " to " |
1158 | << " variable " << *V << "\n" ); |
1159 | deleteExpression(E); |
1160 | return ExprResult::some(Expr: createVariableExpression(V)); |
1161 | } |
1162 | |
1163 | CongruenceClass *CC = ValueToClass.lookup(Val: V); |
1164 | if (CC) { |
1165 | if (CC->getLeader() && CC->getLeader() != I) { |
1166 | return ExprResult::some(Expr: createVariableOrConstant(V: CC->getLeader()), ExtraDep: V); |
1167 | } |
1168 | if (CC->getDefiningExpr()) { |
1169 | if (I) |
1170 | LLVM_DEBUG(dbgs() << "Simplified " << *I << " to " |
1171 | << " expression " << *CC->getDefiningExpr() << "\n" ); |
1172 | NumGVNOpsSimplified++; |
1173 | deleteExpression(E); |
1174 | return ExprResult::some(Expr: CC->getDefiningExpr(), ExtraDep: V); |
1175 | } |
1176 | } |
1177 | |
1178 | return ExprResult::none(); |
1179 | } |
1180 | |
1181 | // Create a value expression from the instruction I, replacing operands with |
1182 | // their leaders. |
1183 | |
1184 | NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const { |
1185 | auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); |
1186 | // TODO: we need to remove context instruction after Value Tracking |
1187 | // can run without context instruction |
1188 | const SimplifyQuery Q = SQ.getWithInstruction(I); |
1189 | |
1190 | bool AllConstant = setBasicExpressionInfo(I, E); |
1191 | |
1192 | if (I->isCommutative()) { |
1193 | // Ensure that commutative instructions that only differ by a permutation |
1194 | // of their operands get the same value number by sorting the operand value |
1195 | // numbers. Since all commutative instructions have two operands it is more |
1196 | // efficient to sort by hand rather than using, say, std::sort. |
1197 | assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!" ); |
1198 | if (shouldSwapOperands(E->getOperand(N: 0), E->getOperand(N: 1))) |
1199 | E->swapOperands(First: 0, Second: 1); |
1200 | } |
1201 | // Perform simplification. |
1202 | if (auto *CI = dyn_cast<CmpInst>(Val: I)) { |
1203 | // Sort the operand value numbers so x<y and y>x get the same value |
1204 | // number. |
1205 | CmpInst::Predicate Predicate = CI->getPredicate(); |
1206 | if (shouldSwapOperands(E->getOperand(N: 0), E->getOperand(N: 1))) { |
1207 | E->swapOperands(First: 0, Second: 1); |
1208 | Predicate = CmpInst::getSwappedPredicate(pred: Predicate); |
1209 | } |
1210 | E->setOpcode((CI->getOpcode() << 8) | Predicate); |
1211 | // TODO: 25% of our time is spent in simplifyCmpInst with pointer operands |
1212 | assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && |
1213 | "Wrong types on cmp instruction" ); |
1214 | assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && |
1215 | E->getOperand(1)->getType() == I->getOperand(1)->getType())); |
1216 | Value *V = |
1217 | simplifyCmpInst(Predicate, LHS: E->getOperand(N: 0), RHS: E->getOperand(N: 1), Q); |
1218 | if (auto Simplified = checkExprResults(E, I, V)) |
1219 | return Simplified; |
1220 | } else if (isa<SelectInst>(Val: I)) { |
1221 | if (isa<Constant>(Val: E->getOperand(N: 0)) || |
1222 | E->getOperand(N: 1) == E->getOperand(N: 2)) { |
1223 | assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && |
1224 | E->getOperand(2)->getType() == I->getOperand(2)->getType()); |
1225 | Value *V = simplifySelectInst(Cond: E->getOperand(N: 0), TrueVal: E->getOperand(N: 1), |
1226 | FalseVal: E->getOperand(N: 2), Q); |
1227 | if (auto Simplified = checkExprResults(E, I, V)) |
1228 | return Simplified; |
1229 | } |
1230 | } else if (I->isBinaryOp()) { |
1231 | Value *V = |
1232 | simplifyBinOp(Opcode: E->getOpcode(), LHS: E->getOperand(N: 0), RHS: E->getOperand(N: 1), Q); |
1233 | if (auto Simplified = checkExprResults(E, I, V)) |
1234 | return Simplified; |
1235 | } else if (auto *CI = dyn_cast<CastInst>(Val: I)) { |
1236 | Value *V = |
1237 | simplifyCastInst(CastOpc: CI->getOpcode(), Op: E->getOperand(N: 0), Ty: CI->getType(), Q); |
1238 | if (auto Simplified = checkExprResults(E, I, V)) |
1239 | return Simplified; |
1240 | } else if (auto *GEPI = dyn_cast<GetElementPtrInst>(Val: I)) { |
1241 | Value *V = simplifyGEPInst(SrcTy: GEPI->getSourceElementType(), Ptr: *E->op_begin(), |
1242 | Indices: ArrayRef(std::next(x: E->op_begin()), E->op_end()), |
1243 | NW: GEPI->getNoWrapFlags(), Q); |
1244 | if (auto Simplified = checkExprResults(E, I, V)) |
1245 | return Simplified; |
1246 | } else if (AllConstant) { |
1247 | // We don't bother trying to simplify unless all of the operands |
1248 | // were constant. |
1249 | // TODO: There are a lot of Simplify*'s we could call here, if we |
1250 | // wanted to. The original motivating case for this code was a |
1251 | // zext i1 false to i8, which we don't have an interface to |
1252 | // simplify (IE there is no SimplifyZExt). |
1253 | |
1254 | SmallVector<Constant *, 8> C; |
1255 | for (Value *Arg : E->operands()) |
1256 | C.emplace_back(Args: cast<Constant>(Val: Arg)); |
1257 | |
1258 | if (Value *V = ConstantFoldInstOperands(I, Ops: C, DL, TLI)) |
1259 | if (auto Simplified = checkExprResults(E, I, V)) |
1260 | return Simplified; |
1261 | } |
1262 | return ExprResult::some(Expr: E); |
1263 | } |
1264 | |
1265 | const AggregateValueExpression * |
1266 | NewGVN::createAggregateValueExpression(Instruction *I) const { |
1267 | if (auto *II = dyn_cast<InsertValueInst>(Val: I)) { |
1268 | auto *E = new (ExpressionAllocator) |
1269 | AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); |
1270 | setBasicExpressionInfo(I, E); |
1271 | E->allocateIntOperands(Allocator&: ExpressionAllocator); |
1272 | llvm::copy(Range: II->indices(), Out: int_op_inserter(E)); |
1273 | return E; |
1274 | } else if (auto *EI = dyn_cast<ExtractValueInst>(Val: I)) { |
1275 | auto *E = new (ExpressionAllocator) |
1276 | AggregateValueExpression(I->getNumOperands(), EI->getNumIndices()); |
1277 | setBasicExpressionInfo(I: EI, E); |
1278 | E->allocateIntOperands(Allocator&: ExpressionAllocator); |
1279 | llvm::copy(Range: EI->indices(), Out: int_op_inserter(E)); |
1280 | return E; |
1281 | } |
1282 | llvm_unreachable("Unhandled type of aggregate value operation" ); |
1283 | } |
1284 | |
1285 | const DeadExpression *NewGVN::createDeadExpression() const { |
1286 | // DeadExpression has no arguments and all DeadExpression's are the same, |
1287 | // so we only need one of them. |
1288 | return SingletonDeadExpression; |
1289 | } |
1290 | |
1291 | const VariableExpression *NewGVN::createVariableExpression(Value *V) const { |
1292 | auto *E = new (ExpressionAllocator) VariableExpression(V); |
1293 | E->setOpcode(V->getValueID()); |
1294 | return E; |
1295 | } |
1296 | |
1297 | const Expression *NewGVN::createVariableOrConstant(Value *V) const { |
1298 | if (auto *C = dyn_cast<Constant>(Val: V)) |
1299 | return createConstantExpression(C); |
1300 | return createVariableExpression(V); |
1301 | } |
1302 | |
1303 | const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const { |
1304 | auto *E = new (ExpressionAllocator) ConstantExpression(C); |
1305 | E->setOpcode(C->getValueID()); |
1306 | return E; |
1307 | } |
1308 | |
1309 | const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const { |
1310 | auto *E = new (ExpressionAllocator) UnknownExpression(I); |
1311 | E->setOpcode(I->getOpcode()); |
1312 | return E; |
1313 | } |
1314 | |
1315 | const CallExpression * |
1316 | NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { |
1317 | // FIXME: Add operand bundles for calls. |
1318 | auto *E = |
1319 | new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); |
1320 | setBasicExpressionInfo(I: CI, E); |
1321 | if (CI->isCommutative()) { |
1322 | // Ensure that commutative intrinsics that only differ by a permutation |
1323 | // of their operands get the same value number by sorting the operand value |
1324 | // numbers. |
1325 | assert(CI->getNumOperands() >= 2 && "Unsupported commutative intrinsic!" ); |
1326 | if (shouldSwapOperands(E->getOperand(N: 0), E->getOperand(N: 1))) |
1327 | E->swapOperands(First: 0, Second: 1); |
1328 | } |
1329 | return E; |
1330 | } |
1331 | |
1332 | // Return true if some equivalent of instruction Inst dominates instruction U. |
1333 | bool NewGVN::someEquivalentDominates(const Instruction *Inst, |
1334 | const Instruction *U) const { |
1335 | auto *CC = ValueToClass.lookup(Val: Inst); |
1336 | // This must be an instruction because we are only called from phi nodes |
1337 | // in the case that the value it needs to check against is an instruction. |
1338 | |
1339 | // The most likely candidates for dominance are the leader and the next leader. |
1340 | // The leader or nextleader will dominate in all cases where there is an |
1341 | // equivalent that is higher up in the dom tree. |
1342 | // We can't *only* check them, however, because the |
1343 | // dominator tree could have an infinite number of non-dominating siblings |
1344 | // with instructions that are in the right congruence class. |
1345 | // A |
1346 | // B C D E F G |
1347 | // | |
1348 | // H |
1349 | // Instruction U could be in H, with equivalents in every other sibling. |
1350 | // Depending on the rpo order picked, the leader could be the equivalent in |
1351 | // any of these siblings. |
1352 | if (!CC) |
1353 | return false; |
1354 | if (alwaysAvailable(V: CC->getLeader())) |
1355 | return true; |
1356 | if (DT->dominates(Def: cast<Instruction>(Val: CC->getLeader()), User: U)) |
1357 | return true; |
1358 | if (CC->getNextLeader().first && |
1359 | DT->dominates(Def: cast<Instruction>(Val: CC->getNextLeader().first), User: U)) |
1360 | return true; |
1361 | return llvm::any_of(Range&: *CC, P: [&](const Value *Member) { |
1362 | return Member != CC->getLeader() && |
1363 | DT->dominates(Def: cast<Instruction>(Val: Member), User: U); |
1364 | }); |
1365 | } |
1366 | |
1367 | // See if we have a congruence class and leader for this operand, and if so, |
1368 | // return it. Otherwise, return the operand itself. |
1369 | Value *NewGVN::lookupOperandLeader(Value *V) const { |
1370 | CongruenceClass *CC = ValueToClass.lookup(Val: V); |
1371 | if (CC) { |
1372 | // Everything in TOP is represented by poison, as it can be any value. |
1373 | // We do have to make sure we get the type right though, so we can't set the |
1374 | // RepLeader to poison. |
1375 | if (CC == TOPClass) |
1376 | return PoisonValue::get(T: V->getType()); |
1377 | return CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader(); |
1378 | } |
1379 | |
1380 | return V; |
1381 | } |
1382 | |
1383 | const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { |
1384 | auto *CC = getMemoryClass(MA); |
1385 | assert(CC->getMemoryLeader() && |
1386 | "Every MemoryAccess should be mapped to a congruence class with a " |
1387 | "representative memory access" ); |
1388 | return CC->getMemoryLeader(); |
1389 | } |
1390 | |
1391 | // Return true if the MemoryAccess is really equivalent to everything. This is |
1392 | // equivalent to the lattice value "TOP" in most lattices. This is the initial |
1393 | // state of all MemoryAccesses. |
1394 | bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { |
1395 | return getMemoryClass(MA) == TOPClass; |
1396 | } |
1397 | |
1398 | LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, |
1399 | LoadInst *LI, |
1400 | const MemoryAccess *MA) const { |
1401 | auto *E = |
1402 | new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA)); |
1403 | E->allocateOperands(Recycler&: ArgRecycler, Allocator&: ExpressionAllocator); |
1404 | E->setType(LoadType); |
1405 | |
1406 | // Give store and loads same opcode so they value number together. |
1407 | E->setOpcode(0); |
1408 | E->op_push_back(Arg: PointerOp); |
1409 | |
1410 | // TODO: Value number heap versions. We may be able to discover |
1411 | // things alias analysis can't on it's own (IE that a store and a |
1412 | // load have the same value, and thus, it isn't clobbering the load). |
1413 | return E; |
1414 | } |
1415 | |
1416 | const StoreExpression * |
1417 | NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const { |
1418 | auto *StoredValueLeader = lookupOperandLeader(V: SI->getValueOperand()); |
1419 | auto *E = new (ExpressionAllocator) |
1420 | StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA); |
1421 | E->allocateOperands(Recycler&: ArgRecycler, Allocator&: ExpressionAllocator); |
1422 | E->setType(SI->getValueOperand()->getType()); |
1423 | |
1424 | // Give store and loads same opcode so they value number together. |
1425 | E->setOpcode(0); |
1426 | E->op_push_back(Arg: lookupOperandLeader(V: SI->getPointerOperand())); |
1427 | |
1428 | // TODO: Value number heap versions. We may be able to discover |
1429 | // things alias analysis can't on it's own (IE that a store and a |
1430 | // load have the same value, and thus, it isn't clobbering the load). |
1431 | return E; |
1432 | } |
1433 | |
1434 | const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { |
1435 | // Unlike loads, we never try to eliminate stores, so we do not check if they |
1436 | // are simple and avoid value numbering them. |
1437 | auto *SI = cast<StoreInst>(Val: I); |
1438 | auto *StoreAccess = getMemoryAccess(I: SI); |
1439 | // Get the expression, if any, for the RHS of the MemoryDef. |
1440 | const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); |
1441 | if (EnableStoreRefinement) |
1442 | StoreRHS = MSSAWalker->getClobberingMemoryAccess(MA: StoreAccess); |
1443 | // If we bypassed the use-def chains, make sure we add a use. |
1444 | StoreRHS = lookupMemoryLeader(MA: StoreRHS); |
1445 | if (StoreRHS != StoreAccess->getDefiningAccess()) |
1446 | addMemoryUsers(To: StoreRHS, U: StoreAccess); |
1447 | // If we are defined by ourselves, use the live on entry def. |
1448 | if (StoreRHS == StoreAccess) |
1449 | StoreRHS = MSSA->getLiveOnEntryDef(); |
1450 | |
1451 | if (SI->isSimple()) { |
1452 | // See if we are defined by a previous store expression, it already has a |
1453 | // value, and it's the same value as our current store. FIXME: Right now, we |
1454 | // only do this for simple stores, we should expand to cover memcpys, etc. |
1455 | const auto *LastStore = createStoreExpression(SI, MA: StoreRHS); |
1456 | const auto *LastCC = ExpressionToClass.lookup(Val: LastStore); |
1457 | // We really want to check whether the expression we matched was a store. No |
1458 | // easy way to do that. However, we can check that the class we found has a |
1459 | // store, which, assuming the value numbering state is not corrupt, is |
1460 | // sufficient, because we must also be equivalent to that store's expression |
1461 | // for it to be in the same class as the load. |
1462 | if (LastCC && LastCC->getStoredValue() == LastStore->getStoredValue()) |
1463 | return LastStore; |
1464 | // Also check if our value operand is defined by a load of the same memory |
1465 | // location, and the memory state is the same as it was then (otherwise, it |
1466 | // could have been overwritten later. See test32 in |
1467 | // transforms/DeadStoreElimination/simple.ll). |
1468 | if (auto *LI = dyn_cast<LoadInst>(Val: LastStore->getStoredValue())) |
1469 | if ((lookupOperandLeader(V: LI->getPointerOperand()) == |
1470 | LastStore->getOperand(N: 0)) && |
1471 | (lookupMemoryLeader(MA: getMemoryAccess(I: LI)->getDefiningAccess()) == |
1472 | StoreRHS)) |
1473 | return LastStore; |
1474 | deleteExpression(E: LastStore); |
1475 | } |
1476 | |
1477 | // If the store is not equivalent to anything, value number it as a store that |
1478 | // produces a unique memory state (instead of using it's MemoryUse, we use |
1479 | // it's MemoryDef). |
1480 | return createStoreExpression(SI, MA: StoreAccess); |
1481 | } |
1482 | |
1483 | // See if we can extract the value of a loaded pointer from a load, a store, or |
1484 | // a memory instruction. |
1485 | const Expression * |
1486 | NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, |
1487 | LoadInst *LI, Instruction *DepInst, |
1488 | MemoryAccess *DefiningAccess) const { |
1489 | assert((!LI || LI->isSimple()) && "Not a simple load" ); |
1490 | if (auto *DepSI = dyn_cast<StoreInst>(Val: DepInst)) { |
1491 | // Can't forward from non-atomic to atomic without violating memory model. |
1492 | // Also don't need to coerce if they are the same type, we will just |
1493 | // propagate. |
1494 | if (LI->isAtomic() > DepSI->isAtomic() || |
1495 | LoadType == DepSI->getValueOperand()->getType()) |
1496 | return nullptr; |
1497 | int Offset = analyzeLoadFromClobberingStore(LoadTy: LoadType, LoadPtr, DepSI, DL); |
1498 | if (Offset >= 0) { |
1499 | if (auto *C = dyn_cast<Constant>( |
1500 | Val: lookupOperandLeader(V: DepSI->getValueOperand()))) { |
1501 | if (Constant *Res = getConstantValueForLoad(SrcVal: C, Offset, LoadTy: LoadType, DL)) { |
1502 | LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI |
1503 | << " to constant " << *Res << "\n" ); |
1504 | return createConstantExpression(C: Res); |
1505 | } |
1506 | } |
1507 | } |
1508 | } else if (auto *DepLI = dyn_cast<LoadInst>(Val: DepInst)) { |
1509 | // Can't forward from non-atomic to atomic without violating memory model. |
1510 | if (LI->isAtomic() > DepLI->isAtomic()) |
1511 | return nullptr; |
1512 | int Offset = analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr, DepLI, DL); |
1513 | if (Offset >= 0) { |
1514 | // We can coerce a constant load into a load. |
1515 | if (auto *C = dyn_cast<Constant>(Val: lookupOperandLeader(V: DepLI))) |
1516 | if (auto *PossibleConstant = |
1517 | getConstantValueForLoad(SrcVal: C, Offset, LoadTy: LoadType, DL)) { |
1518 | LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI |
1519 | << " to constant " << *PossibleConstant << "\n" ); |
1520 | return createConstantExpression(C: PossibleConstant); |
1521 | } |
1522 | } |
1523 | } else if (auto *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) { |
1524 | int Offset = analyzeLoadFromClobberingMemInst(LoadTy: LoadType, LoadPtr, DepMI, DL); |
1525 | if (Offset >= 0) { |
1526 | if (auto *PossibleConstant = |
1527 | getConstantMemInstValueForLoad(SrcInst: DepMI, Offset, LoadTy: LoadType, DL)) { |
1528 | LLVM_DEBUG(dbgs() << "Coercing load from meminst " << *DepMI |
1529 | << " to constant " << *PossibleConstant << "\n" ); |
1530 | return createConstantExpression(C: PossibleConstant); |
1531 | } |
1532 | } |
1533 | } |
1534 | |
1535 | if (auto *II = dyn_cast<IntrinsicInst>(Val: DepInst)) { |
1536 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) { |
1537 | auto *LifetimePtr = II->getOperand(i_nocapture: 1); |
1538 | if (LoadPtr == lookupOperandLeader(V: LifetimePtr) || |
1539 | AA->isMustAlias(V1: LoadPtr, V2: LifetimePtr)) |
1540 | return createConstantExpression(C: UndefValue::get(T: LoadType)); |
1541 | } |
1542 | } |
1543 | |
1544 | // All of the below are only true if the loaded pointer is produced |
1545 | // by the dependent instruction. |
1546 | if (!DepInst->getType()->isPointerTy() || |
1547 | (LoadPtr != lookupOperandLeader(V: DepInst) && |
1548 | !AA->isMustAlias(V1: LoadPtr, V2: DepInst))) |
1549 | return nullptr; |
1550 | // If this load really doesn't depend on anything, then we must be loading an |
1551 | // undef value. This can happen when loading for a fresh allocation with no |
1552 | // intervening stores, for example. Note that this is only true in the case |
1553 | // that the result of the allocation is pointer equal to the load ptr. |
1554 | if (isa<AllocaInst>(Val: DepInst)) { |
1555 | return createConstantExpression(C: UndefValue::get(T: LoadType)); |
1556 | } else if (auto *InitVal = |
1557 | getInitialValueOfAllocation(V: DepInst, TLI, Ty: LoadType)) |
1558 | return createConstantExpression(C: InitVal); |
1559 | |
1560 | return nullptr; |
1561 | } |
1562 | |
1563 | const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { |
1564 | auto *LI = cast<LoadInst>(Val: I); |
1565 | |
1566 | // We can eliminate in favor of non-simple loads, but we won't be able to |
1567 | // eliminate the loads themselves. |
1568 | if (!LI->isSimple()) |
1569 | return nullptr; |
1570 | |
1571 | Value *LoadAddressLeader = lookupOperandLeader(V: LI->getPointerOperand()); |
1572 | // Load of undef is UB. |
1573 | if (isa<UndefValue>(Val: LoadAddressLeader)) |
1574 | return createConstantExpression(C: PoisonValue::get(T: LI->getType())); |
1575 | MemoryAccess *OriginalAccess = getMemoryAccess(I); |
1576 | MemoryAccess *DefiningAccess = |
1577 | MSSAWalker->getClobberingMemoryAccess(MA: OriginalAccess); |
1578 | |
1579 | if (!MSSA->isLiveOnEntryDef(MA: DefiningAccess)) { |
1580 | if (auto *MD = dyn_cast<MemoryDef>(Val: DefiningAccess)) { |
1581 | Instruction *DefiningInst = MD->getMemoryInst(); |
1582 | // If the defining instruction is not reachable, replace with poison. |
1583 | if (!ReachableBlocks.count(Ptr: DefiningInst->getParent())) |
1584 | return createConstantExpression(C: PoisonValue::get(T: LI->getType())); |
1585 | // This will handle stores and memory insts. We only do if it the |
1586 | // defining access has a different type, or it is a pointer produced by |
1587 | // certain memory operations that cause the memory to have a fixed value |
1588 | // (IE things like calloc). |
1589 | if (const auto *CoercionResult = |
1590 | performSymbolicLoadCoercion(LoadType: LI->getType(), LoadPtr: LoadAddressLeader, LI, |
1591 | DepInst: DefiningInst, DefiningAccess)) |
1592 | return CoercionResult; |
1593 | } |
1594 | } |
1595 | |
1596 | const auto *LE = createLoadExpression(LoadType: LI->getType(), PointerOp: LoadAddressLeader, LI, |
1597 | MA: DefiningAccess); |
1598 | // If our MemoryLeader is not our defining access, add a use to the |
1599 | // MemoryLeader, so that we get reprocessed when it changes. |
1600 | if (LE->getMemoryLeader() != DefiningAccess) |
1601 | addMemoryUsers(To: LE->getMemoryLeader(), U: OriginalAccess); |
1602 | return LE; |
1603 | } |
1604 | |
1605 | NewGVN::ExprResult |
1606 | NewGVN::performSymbolicPredicateInfoEvaluation(IntrinsicInst *I) const { |
1607 | auto *PI = PredInfo->getPredicateInfoFor(V: I); |
1608 | if (!PI) |
1609 | return ExprResult::none(); |
1610 | |
1611 | LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n" ); |
1612 | |
1613 | const std::optional<PredicateConstraint> &Constraint = PI->getConstraint(); |
1614 | if (!Constraint) |
1615 | return ExprResult::none(); |
1616 | |
1617 | CmpInst::Predicate Predicate = Constraint->Predicate; |
1618 | Value *CmpOp0 = I->getOperand(i_nocapture: 0); |
1619 | Value *CmpOp1 = Constraint->OtherOp; |
1620 | |
1621 | Value *FirstOp = lookupOperandLeader(V: CmpOp0); |
1622 | Value *SecondOp = lookupOperandLeader(V: CmpOp1); |
1623 | Value *AdditionallyUsedValue = CmpOp0; |
1624 | |
1625 | // Sort the ops. |
1626 | if (shouldSwapOperandsForIntrinsic(FirstOp, SecondOp, I)) { |
1627 | std::swap(a&: FirstOp, b&: SecondOp); |
1628 | Predicate = CmpInst::getSwappedPredicate(pred: Predicate); |
1629 | AdditionallyUsedValue = CmpOp1; |
1630 | } |
1631 | |
1632 | if (Predicate == CmpInst::ICMP_EQ) |
1633 | return ExprResult::some(Expr: createVariableOrConstant(V: FirstOp), |
1634 | ExtraDep: AdditionallyUsedValue, PredDep: PI); |
1635 | |
1636 | // Handle the special case of floating point. |
1637 | if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(Val: FirstOp) && |
1638 | !cast<ConstantFP>(Val: FirstOp)->isZero()) |
1639 | return ExprResult::some(Expr: createConstantExpression(C: cast<Constant>(Val: FirstOp)), |
1640 | ExtraDep: AdditionallyUsedValue, PredDep: PI); |
1641 | |
1642 | return ExprResult::none(); |
1643 | } |
1644 | |
1645 | // Evaluate read only and pure calls, and create an expression result. |
1646 | NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const { |
1647 | auto *CI = cast<CallInst>(Val: I); |
1648 | if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) { |
1649 | // Intrinsics with the returned attribute are copies of arguments. |
1650 | if (auto *ReturnedValue = II->getReturnedArgOperand()) { |
1651 | if (II->getIntrinsicID() == Intrinsic::ssa_copy) |
1652 | if (auto Res = performSymbolicPredicateInfoEvaluation(I: II)) |
1653 | return Res; |
1654 | return ExprResult::some(Expr: createVariableOrConstant(V: ReturnedValue)); |
1655 | } |
1656 | } |
1657 | |
1658 | // FIXME: Currently the calls which may access the thread id may |
1659 | // be considered as not accessing the memory. But this is |
1660 | // problematic for coroutines, since coroutines may resume in a |
1661 | // different thread. So we disable the optimization here for the |
1662 | // correctness. However, it may block many other correct |
1663 | // optimizations. Revert this one when we detect the memory |
1664 | // accessing kind more precisely. |
1665 | if (CI->getFunction()->isPresplitCoroutine()) |
1666 | return ExprResult::none(); |
1667 | |
1668 | // Do not combine convergent calls since they implicitly depend on the set of |
1669 | // threads that is currently executing, and they might be in different basic |
1670 | // blocks. |
1671 | if (CI->isConvergent()) |
1672 | return ExprResult::none(); |
1673 | |
1674 | if (AA->doesNotAccessMemory(Call: CI)) { |
1675 | return ExprResult::some( |
1676 | Expr: createCallExpression(CI, MA: TOPClass->getMemoryLeader())); |
1677 | } else if (AA->onlyReadsMemory(Call: CI)) { |
1678 | if (auto *MA = MSSA->getMemoryAccess(I: CI)) { |
1679 | auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA); |
1680 | return ExprResult::some(Expr: createCallExpression(CI, MA: DefiningAccess)); |
1681 | } else // MSSA determined that CI does not access memory. |
1682 | return ExprResult::some( |
1683 | Expr: createCallExpression(CI, MA: TOPClass->getMemoryLeader())); |
1684 | } |
1685 | return ExprResult::none(); |
1686 | } |
1687 | |
1688 | // Retrieve the memory class for a given MemoryAccess. |
1689 | CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const { |
1690 | auto *Result = MemoryAccessToClass.lookup(Val: MA); |
1691 | assert(Result && "Should have found memory class" ); |
1692 | return Result; |
1693 | } |
1694 | |
1695 | // Update the MemoryAccess equivalence table to say that From is equal to To, |
1696 | // and return true if this is different from what already existed in the table. |
1697 | bool NewGVN::setMemoryClass(const MemoryAccess *From, |
1698 | CongruenceClass *NewClass) { |
1699 | assert(NewClass && |
1700 | "Every MemoryAccess should be getting mapped to a non-null class" ); |
1701 | LLVM_DEBUG(dbgs() << "Setting " << *From); |
1702 | LLVM_DEBUG(dbgs() << " equivalent to congruence class " ); |
1703 | LLVM_DEBUG(dbgs() << NewClass->getID() |
1704 | << " with current MemoryAccess leader " ); |
1705 | LLVM_DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n" ); |
1706 | |
1707 | auto LookupResult = MemoryAccessToClass.find(Val: From); |
1708 | bool Changed = false; |
1709 | // If it's already in the table, see if the value changed. |
1710 | if (LookupResult != MemoryAccessToClass.end()) { |
1711 | auto *OldClass = LookupResult->second; |
1712 | if (OldClass != NewClass) { |
1713 | // If this is a phi, we have to handle memory member updates. |
1714 | if (auto *MP = dyn_cast<MemoryPhi>(Val: From)) { |
1715 | OldClass->memory_erase(M: MP); |
1716 | NewClass->memory_insert(M: MP); |
1717 | // This may have killed the class if it had no non-memory members |
1718 | if (OldClass->getMemoryLeader() == From) { |
1719 | if (OldClass->definesNoMemory()) { |
1720 | OldClass->setMemoryLeader(nullptr); |
1721 | } else { |
1722 | OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); |
1723 | LLVM_DEBUG(dbgs() << "Memory class leader change for class " |
1724 | << OldClass->getID() << " to " |
1725 | << *OldClass->getMemoryLeader() |
1726 | << " due to removal of a memory member " << *From |
1727 | << "\n" ); |
1728 | markMemoryLeaderChangeTouched(CC: OldClass); |
1729 | } |
1730 | } |
1731 | } |
1732 | // It wasn't equivalent before, and now it is. |
1733 | LookupResult->second = NewClass; |
1734 | Changed = true; |
1735 | } |
1736 | } |
1737 | |
1738 | return Changed; |
1739 | } |
1740 | |
1741 | // Determine if a instruction is cycle-free. That means the values in the |
1742 | // instruction don't depend on any expressions that can change value as a result |
1743 | // of the instruction. For example, a non-cycle free instruction would be v = |
1744 | // phi(0, v+1). |
1745 | bool NewGVN::isCycleFree(const Instruction *I) const { |
1746 | // In order to compute cycle-freeness, we do SCC finding on the instruction, |
1747 | // and see what kind of SCC it ends up in. If it is a singleton, it is |
1748 | // cycle-free. If it is not in a singleton, it is only cycle free if the |
1749 | // other members are all phi nodes (as they do not compute anything, they are |
1750 | // copies). |
1751 | auto ICS = InstCycleState.lookup(Val: I); |
1752 | if (ICS == ICS_Unknown) { |
1753 | SCCFinder.Start(Start: I); |
1754 | auto &SCC = SCCFinder.getComponentFor(V: I); |
1755 | // It's cycle free if it's size 1 or the SCC is *only* phi nodes. |
1756 | if (SCC.size() == 1) |
1757 | InstCycleState.insert(KV: {I, ICS_CycleFree}); |
1758 | else { |
1759 | bool AllPhis = llvm::all_of(Range: SCC, P: [](const Value *V) { |
1760 | return isa<PHINode>(Val: V) || isCopyOfAPHI(V); |
1761 | }); |
1762 | ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; |
1763 | for (const auto *Member : SCC) |
1764 | if (auto *MemberPhi = dyn_cast<PHINode>(Val: Member)) |
1765 | InstCycleState.insert(KV: {MemberPhi, ICS}); |
1766 | } |
1767 | } |
1768 | if (ICS == ICS_Cycle) |
1769 | return false; |
1770 | return true; |
1771 | } |
1772 | |
1773 | // Evaluate PHI nodes symbolically and create an expression result. |
1774 | const Expression * |
1775 | NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, |
1776 | Instruction *I, |
1777 | BasicBlock *PHIBlock) const { |
1778 | // True if one of the incoming phi edges is a backedge. |
1779 | bool HasBackedge = false; |
1780 | // All constant tracks the state of whether all the *original* phi operands |
1781 | // This is really shorthand for "this phi cannot cycle due to forward |
1782 | // change in value of the phi is guaranteed not to later change the value of |
1783 | // the phi. IE it can't be v = phi(undef, v+1) |
1784 | bool OriginalOpsConstant = true; |
1785 | auto *E = cast<PHIExpression>(Val: createPHIExpression( |
1786 | PHIOperands: PHIOps, I, PHIBlock, HasBackedge, OriginalOpsConstant)); |
1787 | // We match the semantics of SimplifyPhiNode from InstructionSimplify here. |
1788 | // See if all arguments are the same. |
1789 | // We track if any were undef because they need special handling. |
1790 | bool HasUndef = false, HasPoison = false; |
1791 | auto Filtered = make_filter_range(Range: E->operands(), Pred: [&](Value *Arg) { |
1792 | if (isa<PoisonValue>(Val: Arg)) { |
1793 | HasPoison = true; |
1794 | return false; |
1795 | } |
1796 | if (isa<UndefValue>(Val: Arg)) { |
1797 | HasUndef = true; |
1798 | return false; |
1799 | } |
1800 | return true; |
1801 | }); |
1802 | // If we are left with no operands, it's dead. |
1803 | if (Filtered.empty()) { |
1804 | // If it has undef or poison at this point, it means there are no-non-undef |
1805 | // arguments, and thus, the value of the phi node must be undef. |
1806 | if (HasUndef) { |
1807 | LLVM_DEBUG( |
1808 | dbgs() << "PHI Node " << *I |
1809 | << " has no non-undef arguments, valuing it as undef\n" ); |
1810 | return createConstantExpression(C: UndefValue::get(T: I->getType())); |
1811 | } |
1812 | if (HasPoison) { |
1813 | LLVM_DEBUG( |
1814 | dbgs() << "PHI Node " << *I |
1815 | << " has no non-poison arguments, valuing it as poison\n" ); |
1816 | return createConstantExpression(C: PoisonValue::get(T: I->getType())); |
1817 | } |
1818 | |
1819 | LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n" ); |
1820 | deleteExpression(E); |
1821 | return createDeadExpression(); |
1822 | } |
1823 | Value *AllSameValue = *(Filtered.begin()); |
1824 | ++Filtered.begin(); |
1825 | // Can't use std::equal here, sadly, because filter.begin moves. |
1826 | if (llvm::all_of(Range&: Filtered, P: [&](Value *Arg) { return Arg == AllSameValue; })) { |
1827 | // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef |
1828 | // in the worst case). |
1829 | if (HasUndef && !isGuaranteedNotToBePoison(V: AllSameValue, AC, CtxI: nullptr, DT)) |
1830 | return E; |
1831 | |
1832 | // In LLVM's non-standard representation of phi nodes, it's possible to have |
1833 | // phi nodes with cycles (IE dependent on other phis that are .... dependent |
1834 | // on the original phi node), especially in weird CFG's where some arguments |
1835 | // are unreachable, or uninitialized along certain paths. This can cause |
1836 | // infinite loops during evaluation. We work around this by not trying to |
1837 | // really evaluate them independently, but instead using a variable |
1838 | // expression to say if one is equivalent to the other. |
1839 | // We also special case undef/poison, so that if we have an undef, we can't |
1840 | // use the common value unless it dominates the phi block. |
1841 | if (HasPoison || HasUndef) { |
1842 | // If we have undef and at least one other value, this is really a |
1843 | // multivalued phi, and we need to know if it's cycle free in order to |
1844 | // evaluate whether we can ignore the undef. The other parts of this are |
1845 | // just shortcuts. If there is no backedge, or all operands are |
1846 | // constants, it also must be cycle free. |
1847 | if (HasBackedge && !OriginalOpsConstant && |
1848 | !isa<UndefValue>(Val: AllSameValue) && !isCycleFree(I)) |
1849 | return E; |
1850 | |
1851 | // Only have to check for instructions |
1852 | if (auto *AllSameInst = dyn_cast<Instruction>(Val: AllSameValue)) |
1853 | if (!someEquivalentDominates(Inst: AllSameInst, U: I)) |
1854 | return E; |
1855 | } |
1856 | // Can't simplify to something that comes later in the iteration. |
1857 | // Otherwise, when and if it changes congruence class, we will never catch |
1858 | // up. We will always be a class behind it. |
1859 | if (isa<Instruction>(Val: AllSameValue) && |
1860 | InstrToDFSNum(V: AllSameValue) > InstrToDFSNum(V: I)) |
1861 | return E; |
1862 | NumGVNPhisAllSame++; |
1863 | LLVM_DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue |
1864 | << "\n" ); |
1865 | deleteExpression(E); |
1866 | return createVariableOrConstant(V: AllSameValue); |
1867 | } |
1868 | return E; |
1869 | } |
1870 | |
1871 | const Expression * |
1872 | NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const { |
1873 | if (auto *EI = dyn_cast<ExtractValueInst>(Val: I)) { |
1874 | auto *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand()); |
1875 | if (WO && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) |
1876 | // EI is an extract from one of our with.overflow intrinsics. Synthesize |
1877 | // a semantically equivalent expression instead of an extract value |
1878 | // expression. |
1879 | return createBinaryExpression(Opcode: WO->getBinaryOp(), T: EI->getType(), |
1880 | Arg1: WO->getLHS(), Arg2: WO->getRHS(), I); |
1881 | } |
1882 | |
1883 | return createAggregateValueExpression(I); |
1884 | } |
1885 | |
1886 | NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { |
1887 | assert(isa<CmpInst>(I) && "Expected a cmp instruction." ); |
1888 | |
1889 | auto *CI = cast<CmpInst>(Val: I); |
1890 | // See if our operands are equal to those of a previous predicate, and if so, |
1891 | // if it implies true or false. |
1892 | auto Op0 = lookupOperandLeader(V: CI->getOperand(i_nocapture: 0)); |
1893 | auto Op1 = lookupOperandLeader(V: CI->getOperand(i_nocapture: 1)); |
1894 | auto OurPredicate = CI->getPredicate(); |
1895 | if (shouldSwapOperands(Op0, Op1)) { |
1896 | std::swap(a&: Op0, b&: Op1); |
1897 | OurPredicate = CI->getSwappedPredicate(); |
1898 | } |
1899 | |
1900 | // Avoid processing the same info twice. |
1901 | const PredicateBase *LastPredInfo = nullptr; |
1902 | // See if we know something about the comparison itself, like it is the target |
1903 | // of an assume. |
1904 | auto *CmpPI = PredInfo->getPredicateInfoFor(V: I); |
1905 | if (isa_and_nonnull<PredicateAssume>(Val: CmpPI)) |
1906 | return ExprResult::some( |
1907 | Expr: createConstantExpression(C: ConstantInt::getTrue(Ty: CI->getType()))); |
1908 | |
1909 | if (Op0 == Op1) { |
1910 | // This condition does not depend on predicates, no need to add users |
1911 | if (CI->isTrueWhenEqual()) |
1912 | return ExprResult::some( |
1913 | Expr: createConstantExpression(C: ConstantInt::getTrue(Ty: CI->getType()))); |
1914 | else if (CI->isFalseWhenEqual()) |
1915 | return ExprResult::some( |
1916 | Expr: createConstantExpression(C: ConstantInt::getFalse(Ty: CI->getType()))); |
1917 | } |
1918 | |
1919 | // NOTE: Because we are comparing both operands here and below, and using |
1920 | // previous comparisons, we rely on fact that predicateinfo knows to mark |
1921 | // comparisons that use renamed operands as users of the earlier comparisons. |
1922 | // It is *not* enough to just mark predicateinfo renamed operands as users of |
1923 | // the earlier comparisons, because the *other* operand may have changed in a |
1924 | // previous iteration. |
1925 | // Example: |
1926 | // icmp slt %a, %b |
1927 | // %b.0 = ssa.copy(%b) |
1928 | // false branch: |
1929 | // icmp slt %c, %b.0 |
1930 | |
1931 | // %c and %a may start out equal, and thus, the code below will say the second |
1932 | // %icmp is false. c may become equal to something else, and in that case the |
1933 | // %second icmp *must* be reexamined, but would not if only the renamed |
1934 | // %operands are considered users of the icmp. |
1935 | |
1936 | // *Currently* we only check one level of comparisons back, and only mark one |
1937 | // level back as touched when changes happen. If you modify this code to look |
1938 | // back farther through comparisons, you *must* mark the appropriate |
1939 | // comparisons as users in PredicateInfo.cpp, or you will cause bugs. See if |
1940 | // we know something just from the operands themselves |
1941 | |
1942 | // See if our operands have predicate info, so that we may be able to derive |
1943 | // something from a previous comparison. |
1944 | for (const auto &Op : CI->operands()) { |
1945 | auto *PI = PredInfo->getPredicateInfoFor(V: Op); |
1946 | if (const auto *PBranch = dyn_cast_or_null<PredicateBranch>(Val: PI)) { |
1947 | if (PI == LastPredInfo) |
1948 | continue; |
1949 | LastPredInfo = PI; |
1950 | // In phi of ops cases, we may have predicate info that we are evaluating |
1951 | // in a different context. |
1952 | if (!DT->dominates(A: PBranch->To, B: I->getParent())) |
1953 | continue; |
1954 | // TODO: Along the false edge, we may know more things too, like |
1955 | // icmp of |
1956 | // same operands is false. |
1957 | // TODO: We only handle actual comparison conditions below, not |
1958 | // and/or. |
1959 | auto *BranchCond = dyn_cast<CmpInst>(Val: PBranch->Condition); |
1960 | if (!BranchCond) |
1961 | continue; |
1962 | auto *BranchOp0 = lookupOperandLeader(V: BranchCond->getOperand(i_nocapture: 0)); |
1963 | auto *BranchOp1 = lookupOperandLeader(V: BranchCond->getOperand(i_nocapture: 1)); |
1964 | auto BranchPredicate = BranchCond->getPredicate(); |
1965 | if (shouldSwapOperands(BranchOp0, BranchOp1)) { |
1966 | std::swap(a&: BranchOp0, b&: BranchOp1); |
1967 | BranchPredicate = BranchCond->getSwappedPredicate(); |
1968 | } |
1969 | if (BranchOp0 == Op0 && BranchOp1 == Op1) { |
1970 | if (PBranch->TrueEdge) { |
1971 | // If we know the previous predicate is true and we are in the true |
1972 | // edge then we may be implied true or false. |
1973 | if (auto R = ICmpInst::isImpliedByMatchingCmp(Pred1: BranchPredicate, |
1974 | Pred2: OurPredicate)) { |
1975 | auto *C = ConstantInt::getBool(Ty: CI->getType(), V: *R); |
1976 | return ExprResult::some(Expr: createConstantExpression(C), PredDep: PI); |
1977 | } |
1978 | } else { |
1979 | // Just handle the ne and eq cases, where if we have the same |
1980 | // operands, we may know something. |
1981 | if (BranchPredicate == OurPredicate) { |
1982 | // Same predicate, same ops,we know it was false, so this is false. |
1983 | return ExprResult::some( |
1984 | Expr: createConstantExpression(C: ConstantInt::getFalse(Ty: CI->getType())), |
1985 | PredDep: PI); |
1986 | } else if (BranchPredicate == |
1987 | CmpInst::getInversePredicate(pred: OurPredicate)) { |
1988 | // Inverse predicate, we know the other was false, so this is true. |
1989 | return ExprResult::some( |
1990 | Expr: createConstantExpression(C: ConstantInt::getTrue(Ty: CI->getType())), |
1991 | PredDep: PI); |
1992 | } |
1993 | } |
1994 | } |
1995 | } |
1996 | } |
1997 | // Create expression will take care of simplifyCmpInst |
1998 | return createExpression(I); |
1999 | } |
2000 | |
2001 | // Substitute and symbolize the instruction before value numbering. |
2002 | NewGVN::ExprResult |
2003 | NewGVN::performSymbolicEvaluation(Instruction *I, |
2004 | SmallPtrSetImpl<Value *> &Visited) const { |
2005 | |
2006 | const Expression *E = nullptr; |
2007 | // TODO: memory intrinsics. |
2008 | // TODO: Some day, we should do the forward propagation and reassociation |
2009 | // parts of the algorithm. |
2010 | switch (I->getOpcode()) { |
2011 | case Instruction::ExtractValue: |
2012 | case Instruction::InsertValue: |
2013 | E = performSymbolicAggrValueEvaluation(I); |
2014 | break; |
2015 | case Instruction::PHI: { |
2016 | SmallVector<ValPair, 3> Ops; |
2017 | auto *PN = cast<PHINode>(Val: I); |
2018 | for (unsigned i = 0; i < PN->getNumOperands(); ++i) |
2019 | Ops.push_back(Elt: {PN->getIncomingValue(i), PN->getIncomingBlock(i)}); |
2020 | // Sort to ensure the invariant createPHIExpression requires is met. |
2021 | sortPHIOps(Ops); |
2022 | E = performSymbolicPHIEvaluation(PHIOps: Ops, I, PHIBlock: getBlockForValue(V: I)); |
2023 | } break; |
2024 | case Instruction::Call: |
2025 | return performSymbolicCallEvaluation(I); |
2026 | break; |
2027 | case Instruction::Store: |
2028 | E = performSymbolicStoreEvaluation(I); |
2029 | break; |
2030 | case Instruction::Load: |
2031 | E = performSymbolicLoadEvaluation(I); |
2032 | break; |
2033 | case Instruction::BitCast: |
2034 | case Instruction::AddrSpaceCast: |
2035 | case Instruction::Freeze: |
2036 | return createExpression(I); |
2037 | break; |
2038 | case Instruction::ICmp: |
2039 | case Instruction::FCmp: |
2040 | return performSymbolicCmpEvaluation(I); |
2041 | break; |
2042 | case Instruction::FNeg: |
2043 | case Instruction::Add: |
2044 | case Instruction::FAdd: |
2045 | case Instruction::Sub: |
2046 | case Instruction::FSub: |
2047 | case Instruction::Mul: |
2048 | case Instruction::FMul: |
2049 | case Instruction::UDiv: |
2050 | case Instruction::SDiv: |
2051 | case Instruction::FDiv: |
2052 | case Instruction::URem: |
2053 | case Instruction::SRem: |
2054 | case Instruction::FRem: |
2055 | case Instruction::Shl: |
2056 | case Instruction::LShr: |
2057 | case Instruction::AShr: |
2058 | case Instruction::And: |
2059 | case Instruction::Or: |
2060 | case Instruction::Xor: |
2061 | case Instruction::Trunc: |
2062 | case Instruction::ZExt: |
2063 | case Instruction::SExt: |
2064 | case Instruction::FPToUI: |
2065 | case Instruction::FPToSI: |
2066 | case Instruction::UIToFP: |
2067 | case Instruction::SIToFP: |
2068 | case Instruction::FPTrunc: |
2069 | case Instruction::FPExt: |
2070 | case Instruction::PtrToInt: |
2071 | case Instruction::IntToPtr: |
2072 | case Instruction::Select: |
2073 | case Instruction::ExtractElement: |
2074 | case Instruction::InsertElement: |
2075 | case Instruction::GetElementPtr: |
2076 | return createExpression(I); |
2077 | break; |
2078 | case Instruction::ShuffleVector: |
2079 | // FIXME: Add support for shufflevector to createExpression. |
2080 | return ExprResult::none(); |
2081 | default: |
2082 | return ExprResult::none(); |
2083 | } |
2084 | return ExprResult::some(Expr: E); |
2085 | } |
2086 | |
2087 | // Look up a container of values/instructions in a map, and touch all the |
2088 | // instructions in the container. Then erase value from the map. |
2089 | template <typename Map, typename KeyType> |
2090 | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { |
2091 | const auto Result = M.find_as(Key); |
2092 | if (Result != M.end()) { |
2093 | for (const typename Map::mapped_type::value_type Mapped : Result->second) |
2094 | TouchedInstructions.set(InstrToDFSNum(Mapped)); |
2095 | M.erase(Result); |
2096 | } |
2097 | } |
2098 | |
2099 | void NewGVN::addAdditionalUsers(Value *To, Value *User) const { |
2100 | assert(User && To != User); |
2101 | if (isa<Instruction>(Val: To)) |
2102 | AdditionalUsers[To].insert(Ptr: User); |
2103 | } |
2104 | |
2105 | void NewGVN::addAdditionalUsers(ExprResult &Res, Instruction *User) const { |
2106 | if (Res.ExtraDep && Res.ExtraDep != User) |
2107 | addAdditionalUsers(To: Res.ExtraDep, User); |
2108 | Res.ExtraDep = nullptr; |
2109 | |
2110 | if (Res.PredDep) { |
2111 | if (const auto *PBranch = dyn_cast<PredicateBranch>(Val: Res.PredDep)) |
2112 | PredicateToUsers[PBranch->Condition].insert(Ptr: User); |
2113 | else if (const auto *PAssume = dyn_cast<PredicateAssume>(Val: Res.PredDep)) |
2114 | PredicateToUsers[PAssume->Condition].insert(Ptr: User); |
2115 | } |
2116 | Res.PredDep = nullptr; |
2117 | } |
2118 | |
2119 | void NewGVN::markUsersTouched(Value *V) { |
2120 | // Now mark the users as touched. |
2121 | for (auto *User : V->users()) { |
2122 | assert(isa<Instruction>(User) && "Use of value not within an instruction?" ); |
2123 | TouchedInstructions.set(InstrToDFSNum(V: User)); |
2124 | } |
2125 | touchAndErase(M&: AdditionalUsers, Key: V); |
2126 | } |
2127 | |
2128 | void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { |
2129 | LLVM_DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n" ); |
2130 | MemoryToUsers[To].insert(Ptr: U); |
2131 | } |
2132 | |
2133 | void NewGVN::markMemoryDefTouched(const MemoryAccess *MA) { |
2134 | TouchedInstructions.set(MemoryToDFSNum(MA)); |
2135 | } |
2136 | |
2137 | void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { |
2138 | if (isa<MemoryUse>(Val: MA)) |
2139 | return; |
2140 | for (const auto *U : MA->users()) |
2141 | TouchedInstructions.set(MemoryToDFSNum(MA: U)); |
2142 | touchAndErase(M&: MemoryToUsers, Key: MA); |
2143 | } |
2144 | |
2145 | // Touch all the predicates that depend on this instruction. |
2146 | void NewGVN::markPredicateUsersTouched(Instruction *I) { |
2147 | touchAndErase(M&: PredicateToUsers, Key: I); |
2148 | } |
2149 | |
2150 | // Mark users affected by a memory leader change. |
2151 | void NewGVN::markMemoryLeaderChangeTouched(CongruenceClass *CC) { |
2152 | for (const auto *M : CC->memory()) |
2153 | markMemoryDefTouched(MA: M); |
2154 | } |
2155 | |
2156 | // Touch the instructions that need to be updated after a congruence class has a |
2157 | // leader change, and mark changed values. |
2158 | void NewGVN::markValueLeaderChangeTouched(CongruenceClass *CC) { |
2159 | for (auto *M : *CC) { |
2160 | if (auto *I = dyn_cast<Instruction>(Val: M)) |
2161 | TouchedInstructions.set(InstrToDFSNum(V: I)); |
2162 | LeaderChanges.insert(Ptr: M); |
2163 | } |
2164 | } |
2165 | |
2166 | // Give a range of things that have instruction DFS numbers, this will return |
2167 | // the member of the range with the smallest dfs number. |
2168 | template <class T, class Range> |
2169 | T *NewGVN::getMinDFSOfRange(const Range &R) const { |
2170 | std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; |
2171 | for (const auto X : R) { |
2172 | auto DFSNum = InstrToDFSNum(X); |
2173 | if (DFSNum < MinDFS.second) |
2174 | MinDFS = {X, DFSNum}; |
2175 | } |
2176 | return MinDFS.first; |
2177 | } |
2178 | |
2179 | // This function returns the MemoryAccess that should be the next leader of |
2180 | // congruence class CC, under the assumption that the current leader is going to |
2181 | // disappear. |
2182 | const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { |
2183 | // TODO: If this ends up to slow, we can maintain a next memory leader like we |
2184 | // do for regular leaders. |
2185 | // Make sure there will be a leader to find. |
2186 | assert(!CC->definesNoMemory() && "Can't get next leader if there is none" ); |
2187 | if (CC->getStoreCount() > 0) { |
2188 | if (auto *NL = dyn_cast_or_null<StoreInst>(Val: CC->getNextLeader().first)) |
2189 | return getMemoryAccess(I: NL); |
2190 | // Find the store with the minimum DFS number. |
2191 | auto *V = getMinDFSOfRange<Value>(R: make_filter_range( |
2192 | Range&: *CC, Pred: [&](const Value *V) { return isa<StoreInst>(Val: V); })); |
2193 | return getMemoryAccess(I: cast<StoreInst>(Val: V)); |
2194 | } |
2195 | assert(CC->getStoreCount() == 0); |
2196 | |
2197 | // Given our assertion, hitting this part must mean |
2198 | // !OldClass->memory_empty() |
2199 | if (CC->memory_size() == 1) |
2200 | return *CC->memory_begin(); |
2201 | return getMinDFSOfRange<const MemoryPhi>(R: CC->memory()); |
2202 | } |
2203 | |
2204 | // This function returns the next value leader of a congruence class, under the |
2205 | // assumption that the current leader is going away. This should end up being |
2206 | // the next most dominating member. |
2207 | Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { |
2208 | // We don't need to sort members if there is only 1, and we don't care about |
2209 | // sorting the TOP class because everything either gets out of it or is |
2210 | // unreachable. |
2211 | |
2212 | if (CC->size() == 1 || CC == TOPClass) { |
2213 | return *(CC->begin()); |
2214 | } else if (CC->getNextLeader().first) { |
2215 | ++NumGVNAvoidedSortedLeaderChanges; |
2216 | return CC->getNextLeader().first; |
2217 | } else { |
2218 | ++NumGVNSortedLeaderChanges; |
2219 | // NOTE: If this ends up to slow, we can maintain a dual structure for |
2220 | // member testing/insertion, or keep things mostly sorted, and sort only |
2221 | // here, or use SparseBitVector or .... |
2222 | return getMinDFSOfRange<Value>(R: *CC); |
2223 | } |
2224 | } |
2225 | |
2226 | // Move a MemoryAccess, currently in OldClass, to NewClass, including updates to |
2227 | // the memory members, etc for the move. |
2228 | // |
2229 | // The invariants of this function are: |
2230 | // |
2231 | // - I must be moving to NewClass from OldClass |
2232 | // - The StoreCount of OldClass and NewClass is expected to have been updated |
2233 | // for I already if it is a store. |
2234 | // - The OldClass memory leader has not been updated yet if I was the leader. |
2235 | void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, |
2236 | MemoryAccess *InstMA, |
2237 | CongruenceClass *OldClass, |
2238 | CongruenceClass *NewClass) { |
2239 | // If the leader is I, and we had a representative MemoryAccess, it should |
2240 | // be the MemoryAccess of OldClass. |
2241 | assert((!InstMA || !OldClass->getMemoryLeader() || |
2242 | OldClass->getLeader() != I || |
2243 | MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) == |
2244 | MemoryAccessToClass.lookup(InstMA)) && |
2245 | "Representative MemoryAccess mismatch" ); |
2246 | // First, see what happens to the new class |
2247 | if (!NewClass->getMemoryLeader()) { |
2248 | // Should be a new class, or a store becoming a leader of a new class. |
2249 | assert(NewClass->size() == 1 || |
2250 | (isa<StoreInst>(I) && NewClass->getStoreCount() == 1)); |
2251 | NewClass->setMemoryLeader(InstMA); |
2252 | // Mark it touched if we didn't just create a singleton |
2253 | LLVM_DEBUG(dbgs() << "Memory class leader change for class " |
2254 | << NewClass->getID() |
2255 | << " due to new memory instruction becoming leader\n" ); |
2256 | markMemoryLeaderChangeTouched(CC: NewClass); |
2257 | } |
2258 | setMemoryClass(From: InstMA, NewClass); |
2259 | // Now, fixup the old class if necessary |
2260 | if (OldClass->getMemoryLeader() == InstMA) { |
2261 | if (!OldClass->definesNoMemory()) { |
2262 | OldClass->setMemoryLeader(getNextMemoryLeader(CC: OldClass)); |
2263 | LLVM_DEBUG(dbgs() << "Memory class leader change for class " |
2264 | << OldClass->getID() << " to " |
2265 | << *OldClass->getMemoryLeader() |
2266 | << " due to removal of old leader " << *InstMA << "\n" ); |
2267 | markMemoryLeaderChangeTouched(CC: OldClass); |
2268 | } else |
2269 | OldClass->setMemoryLeader(nullptr); |
2270 | } |
2271 | } |
2272 | |
2273 | // Move a value, currently in OldClass, to be part of NewClass |
2274 | // Update OldClass and NewClass for the move (including changing leaders, etc). |
2275 | void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, |
2276 | CongruenceClass *OldClass, |
2277 | CongruenceClass *NewClass) { |
2278 | if (I == OldClass->getNextLeader().first) |
2279 | OldClass->resetNextLeader(); |
2280 | |
2281 | OldClass->erase(M: I); |
2282 | NewClass->insert(M: I); |
2283 | |
2284 | // Ensure that the leader has the lowest RPO. If the leader changed notify all |
2285 | // members of the class. |
2286 | if (NewClass->getLeader() != I && |
2287 | NewClass->addPossibleLeader(LeaderPair: {I, InstrToDFSNum(V: I)})) { |
2288 | markValueLeaderChangeTouched(CC: NewClass); |
2289 | } |
2290 | |
2291 | // Handle our special casing of stores. |
2292 | if (auto *SI = dyn_cast<StoreInst>(Val: I)) { |
2293 | OldClass->decStoreCount(); |
2294 | // Okay, so when do we want to make a store a leader of a class? |
2295 | // If we have a store defined by an earlier load, we want the earlier load |
2296 | // to lead the class. |
2297 | // If we have a store defined by something else, we want the store to lead |
2298 | // the class so everything else gets the "something else" as a value. |
2299 | // If we have a store as the single member of the class, we want the store |
2300 | // as the leader |
2301 | if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) { |
2302 | // If it's a store expression we are using, it means we are not equivalent |
2303 | // to something earlier. |
2304 | if (auto *SE = dyn_cast<StoreExpression>(Val: E)) { |
2305 | NewClass->setStoredValue(SE->getStoredValue()); |
2306 | markValueLeaderChangeTouched(CC: NewClass); |
2307 | // Shift the new class leader to be the store |
2308 | LLVM_DEBUG(dbgs() << "Changing leader of congruence class " |
2309 | << NewClass->getID() << " from " |
2310 | << *NewClass->getLeader() << " to " << *SI |
2311 | << " because store joined class\n" ); |
2312 | // If we changed the leader, we have to mark it changed because we don't |
2313 | // know what it will do to symbolic evaluation. |
2314 | NewClass->setLeader({SI, InstrToDFSNum(V: SI)}); |
2315 | } |
2316 | // We rely on the code below handling the MemoryAccess change. |
2317 | } |
2318 | NewClass->incStoreCount(); |
2319 | } |
2320 | // True if there is no memory instructions left in a class that had memory |
2321 | // instructions before. |
2322 | |
2323 | // If it's not a memory use, set the MemoryAccess equivalence |
2324 | auto *InstMA = dyn_cast_or_null<MemoryDef>(Val: getMemoryAccess(I)); |
2325 | if (InstMA) |
2326 | moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); |
2327 | ValueToClass[I] = NewClass; |
2328 | // See if we destroyed the class or need to swap leaders. |
2329 | if (OldClass->empty() && OldClass != TOPClass) { |
2330 | if (OldClass->getDefiningExpr()) { |
2331 | LLVM_DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() |
2332 | << " from table\n" ); |
2333 | // We erase it as an exact expression to make sure we don't just erase an |
2334 | // equivalent one. |
2335 | auto Iter = ExpressionToClass.find_as( |
2336 | Val: ExactEqualsExpression(*OldClass->getDefiningExpr())); |
2337 | if (Iter != ExpressionToClass.end()) |
2338 | ExpressionToClass.erase(I: Iter); |
2339 | #ifdef EXPENSIVE_CHECKS |
2340 | assert( |
2341 | (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) && |
2342 | "We erased the expression we just inserted, which should not happen" ); |
2343 | #endif |
2344 | } |
2345 | } else if (OldClass->getLeader() == I) { |
2346 | // When the leader changes, the value numbering of |
2347 | // everything may change due to symbolization changes, so we need to |
2348 | // reprocess. |
2349 | LLVM_DEBUG(dbgs() << "Value class leader change for class " |
2350 | << OldClass->getID() << "\n" ); |
2351 | ++NumGVNLeaderChanges; |
2352 | // Destroy the stored value if there are no more stores to represent it. |
2353 | // Note that this is basically clean up for the expression removal that |
2354 | // happens below. If we remove stores from a class, we may leave it as a |
2355 | // class of equivalent memory phis. |
2356 | if (OldClass->getStoreCount() == 0) { |
2357 | if (OldClass->getStoredValue()) |
2358 | OldClass->setStoredValue(nullptr); |
2359 | } |
2360 | OldClass->setLeader({getNextValueLeader(CC: OldClass), |
2361 | InstrToDFSNum(V: getNextValueLeader(CC: OldClass))}); |
2362 | OldClass->resetNextLeader(); |
2363 | markValueLeaderChangeTouched(CC: OldClass); |
2364 | } |
2365 | } |
2366 | |
2367 | // For a given expression, mark the phi of ops instructions that could have |
2368 | // changed as a result. |
2369 | void NewGVN::markPhiOfOpsChanged(const Expression *E) { |
2370 | touchAndErase(M&: ExpressionToPhiOfOps, Key: E); |
2371 | } |
2372 | |
2373 | // Perform congruence finding on a given value numbering expression. |
2374 | void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { |
2375 | // This is guaranteed to return something, since it will at least find |
2376 | // TOP. |
2377 | |
2378 | CongruenceClass *IClass = ValueToClass.lookup(Val: I); |
2379 | assert(IClass && "Should have found a IClass" ); |
2380 | // Dead classes should have been eliminated from the mapping. |
2381 | assert(!IClass->isDead() && "Found a dead class" ); |
2382 | |
2383 | CongruenceClass *EClass = nullptr; |
2384 | if (const auto *VE = dyn_cast<VariableExpression>(Val: E)) { |
2385 | EClass = ValueToClass.lookup(Val: VE->getVariableValue()); |
2386 | } else if (isa<DeadExpression>(Val: E)) { |
2387 | EClass = TOPClass; |
2388 | } |
2389 | if (!EClass) { |
2390 | auto lookupResult = ExpressionToClass.try_emplace(Key: E); |
2391 | |
2392 | // If it's not in the value table, create a new congruence class. |
2393 | if (lookupResult.second) { |
2394 | CongruenceClass *NewClass = createCongruenceClass(Leader: nullptr, E); |
2395 | auto place = lookupResult.first; |
2396 | place->second = NewClass; |
2397 | |
2398 | // Constants and variables should always be made the leader. |
2399 | if (const auto *CE = dyn_cast<ConstantExpression>(Val: E)) { |
2400 | NewClass->setLeader({CE->getConstantValue(), 0}); |
2401 | } else if (const auto *SE = dyn_cast<StoreExpression>(Val: E)) { |
2402 | StoreInst *SI = SE->getStoreInst(); |
2403 | NewClass->setLeader({SI, InstrToDFSNum(V: SI)}); |
2404 | NewClass->setStoredValue(SE->getStoredValue()); |
2405 | // The RepMemoryAccess field will be filled in properly by the |
2406 | // moveValueToNewCongruenceClass call. |
2407 | } else { |
2408 | NewClass->setLeader({I, InstrToDFSNum(V: I)}); |
2409 | } |
2410 | assert(!isa<VariableExpression>(E) && |
2411 | "VariableExpression should have been handled already" ); |
2412 | |
2413 | EClass = NewClass; |
2414 | LLVM_DEBUG(dbgs() << "Created new congruence class for " << *I |
2415 | << " using expression " << *E << " at " |
2416 | << NewClass->getID() << " and leader " |
2417 | << *(NewClass->getLeader())); |
2418 | if (NewClass->getStoredValue()) |
2419 | LLVM_DEBUG(dbgs() << " and stored value " |
2420 | << *(NewClass->getStoredValue())); |
2421 | LLVM_DEBUG(dbgs() << "\n" ); |
2422 | } else { |
2423 | EClass = lookupResult.first->second; |
2424 | if (isa<ConstantExpression>(Val: E)) |
2425 | assert((isa<Constant>(EClass->getLeader()) || |
2426 | (EClass->getStoredValue() && |
2427 | isa<Constant>(EClass->getStoredValue()))) && |
2428 | "Any class with a constant expression should have a " |
2429 | "constant leader" ); |
2430 | |
2431 | assert(EClass && "Somehow don't have an eclass" ); |
2432 | |
2433 | assert(!EClass->isDead() && "We accidentally looked up a dead class" ); |
2434 | } |
2435 | } |
2436 | bool ClassChanged = IClass != EClass; |
2437 | bool LeaderChanged = LeaderChanges.erase(Ptr: I); |
2438 | if (ClassChanged || LeaderChanged) { |
2439 | LLVM_DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " |
2440 | << *E << "\n" ); |
2441 | if (ClassChanged) { |
2442 | moveValueToNewCongruenceClass(I, E, OldClass: IClass, NewClass: EClass); |
2443 | markPhiOfOpsChanged(E); |
2444 | } |
2445 | |
2446 | markUsersTouched(V: I); |
2447 | if (MemoryAccess *MA = getMemoryAccess(I)) |
2448 | markMemoryUsersTouched(MA); |
2449 | if (auto *CI = dyn_cast<CmpInst>(Val: I)) |
2450 | markPredicateUsersTouched(I: CI); |
2451 | } |
2452 | // If we changed the class of the store, we want to ensure nothing finds the |
2453 | // old store expression. In particular, loads do not compare against stored |
2454 | // value, so they will find old store expressions (and associated class |
2455 | // mappings) if we leave them in the table. |
2456 | if (ClassChanged && isa<StoreInst>(Val: I)) { |
2457 | auto *OldE = ValueToExpression.lookup(Val: I); |
2458 | // It could just be that the old class died. We don't want to erase it if we |
2459 | // just moved classes. |
2460 | if (OldE && isa<StoreExpression>(Val: OldE) && *E != *OldE) { |
2461 | // Erase this as an exact expression to ensure we don't erase expressions |
2462 | // equivalent to it. |
2463 | auto Iter = ExpressionToClass.find_as(Val: ExactEqualsExpression(*OldE)); |
2464 | if (Iter != ExpressionToClass.end()) |
2465 | ExpressionToClass.erase(I: Iter); |
2466 | } |
2467 | } |
2468 | ValueToExpression[I] = E; |
2469 | } |
2470 | |
2471 | // Process the fact that Edge (from, to) is reachable, including marking |
2472 | // any newly reachable blocks and instructions for processing. |
2473 | void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { |
2474 | // Check if the Edge was reachable before. |
2475 | if (ReachableEdges.insert(V: {From, To}).second) { |
2476 | // If this block wasn't reachable before, all instructions are touched. |
2477 | if (ReachableBlocks.insert(Ptr: To).second) { |
2478 | LLVM_DEBUG(dbgs() << "Block " << getBlockName(To) |
2479 | << " marked reachable\n" ); |
2480 | const auto &InstRange = BlockInstRange.lookup(Val: To); |
2481 | TouchedInstructions.set(I: InstRange.first, E: InstRange.second); |
2482 | } else { |
2483 | LLVM_DEBUG(dbgs() << "Block " << getBlockName(To) |
2484 | << " was reachable, but new edge {" |
2485 | << getBlockName(From) << "," << getBlockName(To) |
2486 | << "} to it found\n" ); |
2487 | |
2488 | // We've made an edge reachable to an existing block, which may |
2489 | // impact predicates. Otherwise, only mark the phi nodes as touched, as |
2490 | // they are the only thing that depend on new edges. Anything using their |
2491 | // values will get propagated to if necessary. |
2492 | if (MemoryAccess *MemPhi = getMemoryAccess(BB: To)) |
2493 | TouchedInstructions.set(InstrToDFSNum(MA: MemPhi)); |
2494 | |
2495 | // FIXME: We should just add a union op on a Bitvector and |
2496 | // SparseBitVector. We can do it word by word faster than we are doing it |
2497 | // here. |
2498 | for (auto InstNum : RevisitOnReachabilityChange[To]) |
2499 | TouchedInstructions.set(InstNum); |
2500 | } |
2501 | } |
2502 | } |
2503 | |
2504 | // Given a predicate condition (from a switch, cmp, or whatever) and a block, |
2505 | // see if we know some constant value for it already. |
2506 | Value *NewGVN::findConditionEquivalence(Value *Cond) const { |
2507 | auto Result = lookupOperandLeader(V: Cond); |
2508 | return isa<Constant>(Val: Result) ? Result : nullptr; |
2509 | } |
2510 | |
2511 | // Process the outgoing edges of a block for reachability. |
2512 | void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) { |
2513 | // Evaluate reachability of terminator instruction. |
2514 | Value *Cond; |
2515 | BasicBlock *TrueSucc, *FalseSucc; |
2516 | if (match(V: TI, P: m_Br(C: m_Value(V&: Cond), T&: TrueSucc, F&: FalseSucc))) { |
2517 | Value *CondEvaluated = findConditionEquivalence(Cond); |
2518 | if (!CondEvaluated) { |
2519 | if (auto *I = dyn_cast<Instruction>(Val: Cond)) { |
2520 | SmallPtrSet<Value *, 4> Visited; |
2521 | auto Res = performSymbolicEvaluation(I, Visited); |
2522 | if (const auto *CE = dyn_cast_or_null<ConstantExpression>(Val: Res.Expr)) { |
2523 | CondEvaluated = CE->getConstantValue(); |
2524 | addAdditionalUsers(Res, User: I); |
2525 | } else { |
2526 | // Did not use simplification result, no need to add the extra |
2527 | // dependency. |
2528 | Res.ExtraDep = nullptr; |
2529 | } |
2530 | } else if (isa<ConstantInt>(Val: Cond)) { |
2531 | CondEvaluated = Cond; |
2532 | } |
2533 | } |
2534 | ConstantInt *CI; |
2535 | if (CondEvaluated && (CI = dyn_cast<ConstantInt>(Val: CondEvaluated))) { |
2536 | if (CI->isOne()) { |
2537 | LLVM_DEBUG(dbgs() << "Condition for Terminator " << *TI |
2538 | << " evaluated to true\n" ); |
2539 | updateReachableEdge(From: B, To: TrueSucc); |
2540 | } else if (CI->isZero()) { |
2541 | LLVM_DEBUG(dbgs() << "Condition for Terminator " << *TI |
2542 | << " evaluated to false\n" ); |
2543 | updateReachableEdge(From: B, To: FalseSucc); |
2544 | } |
2545 | } else { |
2546 | updateReachableEdge(From: B, To: TrueSucc); |
2547 | updateReachableEdge(From: B, To: FalseSucc); |
2548 | } |
2549 | } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) { |
2550 | // For switches, propagate the case values into the case |
2551 | // destinations. |
2552 | |
2553 | Value *SwitchCond = SI->getCondition(); |
2554 | Value *CondEvaluated = findConditionEquivalence(Cond: SwitchCond); |
2555 | // See if we were able to turn this switch statement into a constant. |
2556 | if (CondEvaluated && isa<ConstantInt>(Val: CondEvaluated)) { |
2557 | auto *CondVal = cast<ConstantInt>(Val: CondEvaluated); |
2558 | // We should be able to get case value for this. |
2559 | auto Case = *SI->findCaseValue(C: CondVal); |
2560 | if (Case.getCaseSuccessor() == SI->getDefaultDest()) { |
2561 | // We proved the value is outside of the range of the case. |
2562 | // We can't do anything other than mark the default dest as reachable, |
2563 | // and go home. |
2564 | updateReachableEdge(From: B, To: SI->getDefaultDest()); |
2565 | return; |
2566 | } |
2567 | // Now get where it goes and mark it reachable. |
2568 | BasicBlock *TargetBlock = Case.getCaseSuccessor(); |
2569 | updateReachableEdge(From: B, To: TargetBlock); |
2570 | } else { |
2571 | for (BasicBlock *TargetBlock : successors(BB: SI->getParent())) |
2572 | updateReachableEdge(From: B, To: TargetBlock); |
2573 | } |
2574 | } else { |
2575 | // Otherwise this is either unconditional, or a type we have no |
2576 | // idea about. Just mark successors as reachable. |
2577 | for (BasicBlock *TargetBlock : successors(BB: TI->getParent())) |
2578 | updateReachableEdge(From: B, To: TargetBlock); |
2579 | |
2580 | // This also may be a memory defining terminator, in which case, set it |
2581 | // equivalent only to itself. |
2582 | // |
2583 | auto *MA = getMemoryAccess(I: TI); |
2584 | if (MA && !isa<MemoryUse>(Val: MA)) { |
2585 | auto *CC = ensureLeaderOfMemoryClass(MA); |
2586 | if (setMemoryClass(From: MA, NewClass: CC)) |
2587 | markMemoryUsersTouched(MA); |
2588 | } |
2589 | } |
2590 | } |
2591 | |
2592 | // Remove the PHI of Ops PHI for I |
2593 | void NewGVN::removePhiOfOps(Instruction *I, PHINode *PHITemp) { |
2594 | InstrDFS.erase(Val: PHITemp); |
2595 | // It's still a temp instruction. We keep it in the array so it gets erased. |
2596 | // However, it's no longer used by I, or in the block |
2597 | TempToBlock.erase(Val: PHITemp); |
2598 | RealToTemp.erase(Val: I); |
2599 | // We don't remove the users from the phi node uses. This wastes a little |
2600 | // time, but such is life. We could use two sets to track which were there |
2601 | // are the start of NewGVN, and which were added, but right nowt he cost of |
2602 | // tracking is more than the cost of checking for more phi of ops. |
2603 | } |
2604 | |
2605 | // Add PHI Op in BB as a PHI of operations version of ExistingValue. |
2606 | void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, |
2607 | Instruction *ExistingValue) { |
2608 | InstrDFS[Op] = InstrToDFSNum(V: ExistingValue); |
2609 | AllTempInstructions.insert(V: Op); |
2610 | TempToBlock[Op] = BB; |
2611 | RealToTemp[ExistingValue] = Op; |
2612 | // Add all users to phi node use, as they are now uses of the phi of ops phis |
2613 | // and may themselves be phi of ops. |
2614 | for (auto *U : ExistingValue->users()) |
2615 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
2616 | PHINodeUses.insert(Ptr: UI); |
2617 | } |
2618 | |
2619 | static bool okayForPHIOfOps(const Instruction *I) { |
2620 | if (!EnablePhiOfOps) |
2621 | return false; |
2622 | return isa<BinaryOperator>(Val: I) || isa<SelectInst>(Val: I) || isa<CmpInst>(Val: I) || |
2623 | isa<LoadInst>(Val: I); |
2624 | } |
2625 | |
2626 | // Return true if this operand will be safe to use for phi of ops. |
2627 | // |
2628 | // The reason some operands are unsafe is that we are not trying to recursively |
2629 | // translate everything back through phi nodes. We actually expect some lookups |
2630 | // of expressions to fail. In particular, a lookup where the expression cannot |
2631 | // exist in the predecessor. This is true even if the expression, as shown, can |
2632 | // be determined to be constant. |
2633 | bool NewGVN::OpIsSafeForPHIOfOps(Value *V, const BasicBlock *PHIBlock, |
2634 | SmallPtrSetImpl<const Value *> &Visited) { |
2635 | SmallVector<Value *, 4> Worklist; |
2636 | Worklist.push_back(Elt: V); |
2637 | while (!Worklist.empty()) { |
2638 | auto *I = Worklist.pop_back_val(); |
2639 | if (!isa<Instruction>(Val: I)) |
2640 | continue; |
2641 | |
2642 | auto OISIt = OpSafeForPHIOfOps.find(Val: {I, CacheIdx}); |
2643 | if (OISIt != OpSafeForPHIOfOps.end()) |
2644 | return OISIt->second; |
2645 | |
2646 | // Keep walking until we either dominate the phi block, or hit a phi, or run |
2647 | // out of things to check. |
2648 | if (DT->properlyDominates(A: getBlockForValue(V: I), B: PHIBlock)) { |
2649 | OpSafeForPHIOfOps.insert(KV: {{I, CacheIdx}, true}); |
2650 | continue; |
2651 | } |
2652 | // PHI in the same block. |
2653 | if (isa<PHINode>(Val: I) && getBlockForValue(V: I) == PHIBlock) { |
2654 | OpSafeForPHIOfOps.insert(KV: {{I, CacheIdx}, false}); |
2655 | return false; |
2656 | } |
2657 | |
2658 | auto *OrigI = cast<Instruction>(Val: I); |
2659 | // When we hit an instruction that reads memory (load, call, etc), we must |
2660 | // consider any store that may happen in the loop. For now, we assume the |
2661 | // worst: there is a store in the loop that alias with this read. |
2662 | // The case where the load is outside the loop is already covered by the |
2663 | // dominator check above. |
2664 | // TODO: relax this condition |
2665 | if (OrigI->mayReadFromMemory()) |
2666 | return false; |
2667 | |
2668 | // Check the operands of the current instruction. |
2669 | for (auto *Op : OrigI->operand_values()) { |
2670 | if (!isa<Instruction>(Val: Op)) |
2671 | continue; |
2672 | // Stop now if we find an unsafe operand. |
2673 | auto OISIt = OpSafeForPHIOfOps.find(Val: {OrigI, CacheIdx}); |
2674 | if (OISIt != OpSafeForPHIOfOps.end()) { |
2675 | if (!OISIt->second) { |
2676 | OpSafeForPHIOfOps.insert(KV: {{I, CacheIdx}, false}); |
2677 | return false; |
2678 | } |
2679 | continue; |
2680 | } |
2681 | if (!Visited.insert(Ptr: Op).second) |
2682 | continue; |
2683 | Worklist.push_back(Elt: cast<Instruction>(Val: Op)); |
2684 | } |
2685 | } |
2686 | OpSafeForPHIOfOps.insert(KV: {{V, CacheIdx}, true}); |
2687 | return true; |
2688 | } |
2689 | |
2690 | // Try to find a leader for instruction TransInst, which is a phi translated |
2691 | // version of something in our original program. Visited is used to ensure we |
2692 | // don't infinite loop during translations of cycles. OrigInst is the |
2693 | // instruction in the original program, and PredBB is the predecessor we |
2694 | // translated it through. |
2695 | Value *NewGVN::findLeaderForInst(Instruction *TransInst, |
2696 | SmallPtrSetImpl<Value *> &Visited, |
2697 | MemoryAccess *MemAccess, Instruction *OrigInst, |
2698 | BasicBlock *PredBB) { |
2699 | unsigned IDFSNum = InstrToDFSNum(V: OrigInst); |
2700 | // Make sure it's marked as a temporary instruction. |
2701 | AllTempInstructions.insert(V: TransInst); |
2702 | // and make sure anything that tries to add it's DFS number is |
2703 | // redirected to the instruction we are making a phi of ops |
2704 | // for. |
2705 | TempToBlock.insert(KV: {TransInst, PredBB}); |
2706 | InstrDFS.insert(KV: {TransInst, IDFSNum}); |
2707 | |
2708 | auto Res = performSymbolicEvaluation(I: TransInst, Visited); |
2709 | const Expression *E = Res.Expr; |
2710 | addAdditionalUsers(Res, User: OrigInst); |
2711 | InstrDFS.erase(Val: TransInst); |
2712 | AllTempInstructions.erase(V: TransInst); |
2713 | TempToBlock.erase(Val: TransInst); |
2714 | if (MemAccess) |
2715 | TempToMemory.erase(Val: TransInst); |
2716 | if (!E) |
2717 | return nullptr; |
2718 | auto *FoundVal = findPHIOfOpsLeader(E, OrigInst, PredBB); |
2719 | if (!FoundVal) { |
2720 | ExpressionToPhiOfOps[E].insert(Ptr: OrigInst); |
2721 | LLVM_DEBUG(dbgs() << "Cannot find phi of ops operand for " << *TransInst |
2722 | << " in block " << getBlockName(PredBB) << "\n" ); |
2723 | return nullptr; |
2724 | } |
2725 | if (auto *SI = dyn_cast<StoreInst>(Val: FoundVal)) |
2726 | FoundVal = SI->getValueOperand(); |
2727 | return FoundVal; |
2728 | } |
2729 | |
2730 | // When we see an instruction that is an op of phis, generate the equivalent phi |
2731 | // of ops form. |
2732 | const Expression * |
2733 | NewGVN::makePossiblePHIOfOps(Instruction *I, |
2734 | SmallPtrSetImpl<Value *> &Visited) { |
2735 | if (!okayForPHIOfOps(I)) |
2736 | return nullptr; |
2737 | |
2738 | if (!Visited.insert(Ptr: I).second) |
2739 | return nullptr; |
2740 | // For now, we require the instruction be cycle free because we don't |
2741 | // *always* create a phi of ops for instructions that could be done as phi |
2742 | // of ops, we only do it if we think it is useful. If we did do it all the |
2743 | // time, we could remove the cycle free check. |
2744 | if (!isCycleFree(I)) |
2745 | return nullptr; |
2746 | |
2747 | // TODO: We don't do phi translation on memory accesses because it's |
2748 | // complicated. For a load, we'd need to be able to simulate a new memoryuse, |
2749 | // which we don't have a good way of doing ATM. |
2750 | auto *MemAccess = getMemoryAccess(I); |
2751 | // If the memory operation is defined by a memory operation this block that |
2752 | // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi |
2753 | // can't help, as it would still be killed by that memory operation. |
2754 | if (MemAccess && !isa<MemoryPhi>(Val: MemAccess->getDefiningAccess()) && |
2755 | MemAccess->getDefiningAccess()->getBlock() == I->getParent()) |
2756 | return nullptr; |
2757 | |
2758 | // Convert op of phis to phi of ops |
2759 | SmallPtrSet<const Value *, 10> VisitedOps; |
2760 | SmallVector<Value *, 4> Ops(I->operand_values()); |
2761 | BasicBlock *SamePHIBlock = nullptr; |
2762 | PHINode *OpPHI = nullptr; |
2763 | if (!DebugCounter::shouldExecute(CounterName: PHIOfOpsCounter)) |
2764 | return nullptr; |
2765 | for (auto *Op : Ops) { |
2766 | if (!isa<PHINode>(Val: Op)) { |
2767 | auto *ValuePHI = RealToTemp.lookup(Val: Op); |
2768 | if (!ValuePHI) |
2769 | continue; |
2770 | LLVM_DEBUG(dbgs() << "Found possible dependent phi of ops\n" ); |
2771 | Op = ValuePHI; |
2772 | } |
2773 | OpPHI = cast<PHINode>(Val: Op); |
2774 | if (!SamePHIBlock) { |
2775 | SamePHIBlock = getBlockForValue(V: OpPHI); |
2776 | } else if (SamePHIBlock != getBlockForValue(V: OpPHI)) { |
2777 | LLVM_DEBUG( |
2778 | dbgs() |
2779 | << "PHIs for operands are not all in the same block, aborting\n" ); |
2780 | return nullptr; |
2781 | } |
2782 | // No point in doing this for one-operand phis. |
2783 | // Since all PHIs for operands must be in the same block, then they must |
2784 | // have the same number of operands so we can just abort. |
2785 | if (OpPHI->getNumOperands() == 1) |
2786 | return nullptr; |
2787 | } |
2788 | |
2789 | if (!OpPHI) |
2790 | return nullptr; |
2791 | |
2792 | SmallVector<ValPair, 4> PHIOps; |
2793 | SmallPtrSet<Value *, 4> Deps; |
2794 | auto *PHIBlock = getBlockForValue(V: OpPHI); |
2795 | RevisitOnReachabilityChange[PHIBlock].reset(Idx: InstrToDFSNum(V: I)); |
2796 | for (unsigned PredNum = 0; PredNum < OpPHI->getNumOperands(); ++PredNum) { |
2797 | auto *PredBB = OpPHI->getIncomingBlock(i: PredNum); |
2798 | Value *FoundVal = nullptr; |
2799 | SmallPtrSet<Value *, 4> CurrentDeps; |
2800 | // We could just skip unreachable edges entirely but it's tricky to do |
2801 | // with rewriting existing phi nodes. |
2802 | if (ReachableEdges.count(V: {PredBB, PHIBlock})) { |
2803 | // Clone the instruction, create an expression from it that is |
2804 | // translated back into the predecessor, and see if we have a leader. |
2805 | Instruction *ValueOp = I->clone(); |
2806 | // Emit the temporal instruction in the predecessor basic block where the |
2807 | // corresponding value is defined. |
2808 | ValueOp->insertBefore(InsertPos: PredBB->getTerminator()->getIterator()); |
2809 | if (MemAccess) |
2810 | TempToMemory.insert(KV: {ValueOp, MemAccess}); |
2811 | bool SafeForPHIOfOps = true; |
2812 | VisitedOps.clear(); |
2813 | for (auto &Op : ValueOp->operands()) { |
2814 | auto *OrigOp = &*Op; |
2815 | // When these operand changes, it could change whether there is a |
2816 | // leader for us or not, so we have to add additional users. |
2817 | if (isa<PHINode>(Val: Op)) { |
2818 | Op = Op->DoPHITranslation(CurBB: PHIBlock, PredBB); |
2819 | if (Op != OrigOp && Op != I) |
2820 | CurrentDeps.insert(Ptr: Op); |
2821 | } else if (auto *ValuePHI = RealToTemp.lookup(Val: Op)) { |
2822 | if (getBlockForValue(V: ValuePHI) == PHIBlock) |
2823 | Op = ValuePHI->getIncomingValueForBlock(BB: PredBB); |
2824 | } |
2825 | // If we phi-translated the op, it must be safe. |
2826 | SafeForPHIOfOps = |
2827 | SafeForPHIOfOps && |
2828 | (Op != OrigOp || OpIsSafeForPHIOfOps(V: Op, PHIBlock, Visited&: VisitedOps)); |
2829 | } |
2830 | // FIXME: For those things that are not safe we could generate |
2831 | // expressions all the way down, and see if this comes out to a |
2832 | // constant. For anything where that is true, and unsafe, we should |
2833 | // have made a phi-of-ops (or value numbered it equivalent to something) |
2834 | // for the pieces already. |
2835 | FoundVal = !SafeForPHIOfOps ? nullptr |
2836 | : findLeaderForInst(TransInst: ValueOp, Visited, |
2837 | MemAccess, OrigInst: I, PredBB); |
2838 | ValueOp->eraseFromParent(); |
2839 | if (!FoundVal) { |
2840 | // We failed to find a leader for the current ValueOp, but this might |
2841 | // change in case of the translated operands change. |
2842 | if (SafeForPHIOfOps) |
2843 | for (auto *Dep : CurrentDeps) |
2844 | addAdditionalUsers(To: Dep, User: I); |
2845 | |
2846 | return nullptr; |
2847 | } |
2848 | Deps.insert_range(R&: CurrentDeps); |
2849 | } else { |
2850 | LLVM_DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " |
2851 | << getBlockName(PredBB) |
2852 | << " because the block is unreachable\n" ); |
2853 | FoundVal = PoisonValue::get(T: I->getType()); |
2854 | RevisitOnReachabilityChange[PHIBlock].set(InstrToDFSNum(V: I)); |
2855 | } |
2856 | |
2857 | PHIOps.push_back(Elt: {FoundVal, PredBB}); |
2858 | LLVM_DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " |
2859 | << getBlockName(PredBB) << "\n" ); |
2860 | } |
2861 | for (auto *Dep : Deps) |
2862 | addAdditionalUsers(To: Dep, User: I); |
2863 | sortPHIOps(Ops: PHIOps); |
2864 | auto *E = performSymbolicPHIEvaluation(PHIOps, I, PHIBlock); |
2865 | if (isa<ConstantExpression>(Val: E) || isa<VariableExpression>(Val: E)) { |
2866 | LLVM_DEBUG( |
2867 | dbgs() |
2868 | << "Not creating real PHI of ops because it simplified to existing " |
2869 | "value or constant\n" ); |
2870 | // We have leaders for all operands, but do not create a real PHI node with |
2871 | // those leaders as operands, so the link between the operands and the |
2872 | // PHI-of-ops is not materialized in the IR. If any of those leaders |
2873 | // changes, the PHI-of-op may change also, so we need to add the operands as |
2874 | // additional users. |
2875 | for (auto &O : PHIOps) |
2876 | addAdditionalUsers(To: O.first, User: I); |
2877 | |
2878 | return E; |
2879 | } |
2880 | auto *ValuePHI = RealToTemp.lookup(Val: I); |
2881 | bool NewPHI = false; |
2882 | if (!ValuePHI) { |
2883 | ValuePHI = |
2884 | PHINode::Create(Ty: I->getType(), NumReservedValues: OpPHI->getNumOperands(), NameStr: "phiofops" ); |
2885 | addPhiOfOps(Op: ValuePHI, BB: PHIBlock, ExistingValue: I); |
2886 | NewPHI = true; |
2887 | NumGVNPHIOfOpsCreated++; |
2888 | } |
2889 | if (NewPHI) { |
2890 | for (auto PHIOp : PHIOps) |
2891 | ValuePHI->addIncoming(V: PHIOp.first, BB: PHIOp.second); |
2892 | } else { |
2893 | TempToBlock[ValuePHI] = PHIBlock; |
2894 | unsigned int i = 0; |
2895 | for (auto PHIOp : PHIOps) { |
2896 | ValuePHI->setIncomingValue(i, V: PHIOp.first); |
2897 | ValuePHI->setIncomingBlock(i, BB: PHIOp.second); |
2898 | ++i; |
2899 | } |
2900 | } |
2901 | RevisitOnReachabilityChange[PHIBlock].set(InstrToDFSNum(V: I)); |
2902 | LLVM_DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I |
2903 | << "\n" ); |
2904 | |
2905 | return E; |
2906 | } |
2907 | |
2908 | // The algorithm initially places the values of the routine in the TOP |
2909 | // congruence class. The leader of TOP is the undetermined value `poison`. |
2910 | // When the algorithm has finished, values still in TOP are unreachable. |
2911 | void NewGVN::initializeCongruenceClasses(Function &F) { |
2912 | NextCongruenceNum = 0; |
2913 | |
2914 | // Note that even though we use the live on entry def as a representative |
2915 | // MemoryAccess, it is *not* the same as the actual live on entry def. We |
2916 | // have no real equivalent to poison for MemoryAccesses, and so we really |
2917 | // should be checking whether the MemoryAccess is top if we want to know if it |
2918 | // is equivalent to everything. Otherwise, what this really signifies is that |
2919 | // the access "it reaches all the way back to the beginning of the function" |
2920 | |
2921 | // Initialize all other instructions to be in TOP class. |
2922 | TOPClass = createCongruenceClass(Leader: nullptr, E: nullptr); |
2923 | TOPClass->setMemoryLeader(MSSA->getLiveOnEntryDef()); |
2924 | // The live on entry def gets put into it's own class |
2925 | MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = |
2926 | createMemoryClass(MA: MSSA->getLiveOnEntryDef()); |
2927 | |
2928 | for (auto *DTN : nodes(G: DT)) { |
2929 | BasicBlock *BB = DTN->getBlock(); |
2930 | // All MemoryAccesses are equivalent to live on entry to start. They must |
2931 | // be initialized to something so that initial changes are noticed. For |
2932 | // the maximal answer, we initialize them all to be the same as |
2933 | // liveOnEntry. |
2934 | auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); |
2935 | if (MemoryBlockDefs) |
2936 | for (const auto &Def : *MemoryBlockDefs) { |
2937 | MemoryAccessToClass[&Def] = TOPClass; |
2938 | auto *MD = dyn_cast<MemoryDef>(Val: &Def); |
2939 | // Insert the memory phis into the member list. |
2940 | if (!MD) { |
2941 | const MemoryPhi *MP = cast<MemoryPhi>(Val: &Def); |
2942 | TOPClass->memory_insert(M: MP); |
2943 | MemoryPhiState.insert(KV: {MP, MPS_TOP}); |
2944 | } |
2945 | |
2946 | if (MD && isa<StoreInst>(Val: MD->getMemoryInst())) |
2947 | TOPClass->incStoreCount(); |
2948 | } |
2949 | |
2950 | // FIXME: This is trying to discover which instructions are uses of phi |
2951 | // nodes. We should move this into one of the myriad of places that walk |
2952 | // all the operands already. |
2953 | for (auto &I : *BB) { |
2954 | if (isa<PHINode>(Val: &I)) |
2955 | for (auto *U : I.users()) |
2956 | if (auto *UInst = dyn_cast<Instruction>(Val: U)) |
2957 | if (InstrToDFSNum(V: UInst) != 0 && okayForPHIOfOps(I: UInst)) |
2958 | PHINodeUses.insert(Ptr: UInst); |
2959 | // Don't insert void terminators into the class. We don't value number |
2960 | // them, and they just end up sitting in TOP. |
2961 | if (I.isTerminator() && I.getType()->isVoidTy()) |
2962 | continue; |
2963 | TOPClass->insert(M: &I); |
2964 | ValueToClass[&I] = TOPClass; |
2965 | } |
2966 | } |
2967 | |
2968 | // Initialize arguments to be in their own unique congruence classes |
2969 | for (auto &FA : F.args()) |
2970 | createSingletonCongruenceClass(Member: &FA); |
2971 | } |
2972 | |
2973 | void NewGVN::cleanupTables() { |
2974 | for (CongruenceClass *&CC : CongruenceClasses) { |
2975 | LLVM_DEBUG(dbgs() << "Congruence class " << CC->getID() << " has " |
2976 | << CC->size() << " members\n" ); |
2977 | // Make sure we delete the congruence class (probably worth switching to |
2978 | // a unique_ptr at some point. |
2979 | delete CC; |
2980 | CC = nullptr; |
2981 | } |
2982 | |
2983 | // Destroy the value expressions |
2984 | SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(), |
2985 | AllTempInstructions.end()); |
2986 | AllTempInstructions.clear(); |
2987 | |
2988 | // We have to drop all references for everything first, so there are no uses |
2989 | // left as we delete them. |
2990 | for (auto *I : TempInst) { |
2991 | I->dropAllReferences(); |
2992 | } |
2993 | |
2994 | while (!TempInst.empty()) { |
2995 | auto *I = TempInst.pop_back_val(); |
2996 | I->deleteValue(); |
2997 | } |
2998 | |
2999 | ValueToClass.clear(); |
3000 | ArgRecycler.clear(ExpressionAllocator); |
3001 | ExpressionAllocator.Reset(); |
3002 | CongruenceClasses.clear(); |
3003 | ExpressionToClass.clear(); |
3004 | ValueToExpression.clear(); |
3005 | RealToTemp.clear(); |
3006 | AdditionalUsers.clear(); |
3007 | ExpressionToPhiOfOps.clear(); |
3008 | TempToBlock.clear(); |
3009 | TempToMemory.clear(); |
3010 | PHINodeUses.clear(); |
3011 | OpSafeForPHIOfOps.clear(); |
3012 | ReachableBlocks.clear(); |
3013 | ReachableEdges.clear(); |
3014 | #ifndef NDEBUG |
3015 | ProcessedCount.clear(); |
3016 | #endif |
3017 | InstrDFS.clear(); |
3018 | InstructionsToErase.clear(); |
3019 | DFSToInstr.clear(); |
3020 | BlockInstRange.clear(); |
3021 | TouchedInstructions.clear(); |
3022 | MemoryAccessToClass.clear(); |
3023 | PredicateToUsers.clear(); |
3024 | MemoryToUsers.clear(); |
3025 | RevisitOnReachabilityChange.clear(); |
3026 | IntrinsicInstPred.clear(); |
3027 | } |
3028 | |
3029 | // Assign local DFS number mapping to instructions, and leave space for Value |
3030 | // PHI's. |
3031 | std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, |
3032 | unsigned Start) { |
3033 | unsigned End = Start; |
3034 | if (MemoryAccess *MemPhi = getMemoryAccess(BB: B)) { |
3035 | InstrDFS[MemPhi] = End++; |
3036 | DFSToInstr.emplace_back(Args&: MemPhi); |
3037 | } |
3038 | |
3039 | // Then the real block goes next. |
3040 | for (auto &I : *B) { |
3041 | // There's no need to call isInstructionTriviallyDead more than once on |
3042 | // an instruction. Therefore, once we know that an instruction is dead |
3043 | // we change its DFS number so that it doesn't get value numbered. |
3044 | if (isInstructionTriviallyDead(I: &I, TLI)) { |
3045 | InstrDFS[&I] = 0; |
3046 | LLVM_DEBUG(dbgs() << "Skipping trivially dead instruction " << I << "\n" ); |
3047 | markInstructionForDeletion(&I); |
3048 | continue; |
3049 | } |
3050 | if (isa<PHINode>(Val: &I)) |
3051 | RevisitOnReachabilityChange[B].set(End); |
3052 | InstrDFS[&I] = End++; |
3053 | DFSToInstr.emplace_back(Args: &I); |
3054 | } |
3055 | |
3056 | // All of the range functions taken half-open ranges (open on the end side). |
3057 | // So we do not subtract one from count, because at this point it is one |
3058 | // greater than the last instruction. |
3059 | return std::make_pair(x&: Start, y&: End); |
3060 | } |
3061 | |
3062 | void NewGVN::updateProcessedCount(const Value *V) { |
3063 | #ifndef NDEBUG |
3064 | assert(++ProcessedCount[V] < 100 && |
3065 | "Seem to have processed the same Value a lot" ); |
3066 | #endif |
3067 | } |
3068 | |
3069 | // Evaluate MemoryPhi nodes symbolically, just like PHI nodes |
3070 | void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { |
3071 | // If all the arguments are the same, the MemoryPhi has the same value as the |
3072 | // argument. Filter out unreachable blocks and self phis from our operands. |
3073 | // TODO: We could do cycle-checking on the memory phis to allow valueizing for |
3074 | // self-phi checking. |
3075 | const BasicBlock *PHIBlock = MP->getBlock(); |
3076 | auto Filtered = make_filter_range(Range: MP->operands(), Pred: [&](const Use &U) { |
3077 | return cast<MemoryAccess>(Val: U) != MP && |
3078 | !isMemoryAccessTOP(MA: cast<MemoryAccess>(Val: U)) && |
3079 | ReachableEdges.count(V: {MP->getIncomingBlock(U), PHIBlock}); |
3080 | }); |
3081 | // If all that is left is nothing, our memoryphi is poison. We keep it as |
3082 | // InitialClass. Note: The only case this should happen is if we have at |
3083 | // least one self-argument. |
3084 | if (Filtered.begin() == Filtered.end()) { |
3085 | if (setMemoryClass(From: MP, NewClass: TOPClass)) |
3086 | markMemoryUsersTouched(MA: MP); |
3087 | return; |
3088 | } |
3089 | |
3090 | // Transform the remaining operands into operand leaders. |
3091 | // FIXME: mapped_iterator should have a range version. |
3092 | auto LookupFunc = [&](const Use &U) { |
3093 | return lookupMemoryLeader(MA: cast<MemoryAccess>(Val: U)); |
3094 | }; |
3095 | auto MappedBegin = map_iterator(I: Filtered.begin(), F: LookupFunc); |
3096 | auto MappedEnd = map_iterator(I: Filtered.end(), F: LookupFunc); |
3097 | |
3098 | // and now check if all the elements are equal. |
3099 | // Sadly, we can't use std::equals since these are random access iterators. |
3100 | const auto *AllSameValue = *MappedBegin; |
3101 | ++MappedBegin; |
3102 | bool AllEqual = std::all_of( |
3103 | first: MappedBegin, last: MappedEnd, |
3104 | pred: [&AllSameValue](const MemoryAccess *V) { return V == AllSameValue; }); |
3105 | |
3106 | if (AllEqual) |
3107 | LLVM_DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue |
3108 | << "\n" ); |
3109 | else |
3110 | LLVM_DEBUG(dbgs() << "Memory Phi value numbered to itself\n" ); |
3111 | // If it's equal to something, it's in that class. Otherwise, it has to be in |
3112 | // a class where it is the leader (other things may be equivalent to it, but |
3113 | // it needs to start off in its own class, which means it must have been the |
3114 | // leader, and it can't have stopped being the leader because it was never |
3115 | // removed). |
3116 | CongruenceClass *CC = |
3117 | AllEqual ? getMemoryClass(MA: AllSameValue) : ensureLeaderOfMemoryClass(MA: MP); |
3118 | auto OldState = MemoryPhiState.lookup(Val: MP); |
3119 | assert(OldState != MPS_Invalid && "Invalid memory phi state" ); |
3120 | auto NewState = AllEqual ? MPS_Equivalent : MPS_Unique; |
3121 | MemoryPhiState[MP] = NewState; |
3122 | if (setMemoryClass(From: MP, NewClass: CC) || OldState != NewState) |
3123 | markMemoryUsersTouched(MA: MP); |
3124 | } |
3125 | |
3126 | // Value number a single instruction, symbolically evaluating, performing |
3127 | // congruence finding, and updating mappings. |
3128 | void NewGVN::valueNumberInstruction(Instruction *I) { |
3129 | LLVM_DEBUG(dbgs() << "Processing instruction " << *I << "\n" ); |
3130 | if (!I->isTerminator()) { |
3131 | const Expression *Symbolized = nullptr; |
3132 | SmallPtrSet<Value *, 2> Visited; |
3133 | if (DebugCounter::shouldExecute(CounterName: VNCounter)) { |
3134 | auto Res = performSymbolicEvaluation(I, Visited); |
3135 | Symbolized = Res.Expr; |
3136 | addAdditionalUsers(Res, User: I); |
3137 | |
3138 | // Make a phi of ops if necessary |
3139 | if (Symbolized && !isa<ConstantExpression>(Val: Symbolized) && |
3140 | !isa<VariableExpression>(Val: Symbolized) && PHINodeUses.count(Ptr: I)) { |
3141 | auto *PHIE = makePossiblePHIOfOps(I, Visited); |
3142 | // If we created a phi of ops, use it. |
3143 | // If we couldn't create one, make sure we don't leave one lying around |
3144 | if (PHIE) { |
3145 | Symbolized = PHIE; |
3146 | } else if (auto *Op = RealToTemp.lookup(Val: I)) { |
3147 | removePhiOfOps(I, PHITemp: Op); |
3148 | } |
3149 | } |
3150 | } else { |
3151 | // Mark the instruction as unused so we don't value number it again. |
3152 | InstrDFS[I] = 0; |
3153 | } |
3154 | // If we couldn't come up with a symbolic expression, use the unknown |
3155 | // expression |
3156 | if (Symbolized == nullptr) |
3157 | Symbolized = createUnknownExpression(I); |
3158 | performCongruenceFinding(I, E: Symbolized); |
3159 | } else { |
3160 | // Handle terminators that return values. All of them produce values we |
3161 | // don't currently understand. We don't place non-value producing |
3162 | // terminators in a class. |
3163 | if (!I->getType()->isVoidTy()) { |
3164 | auto *Symbolized = createUnknownExpression(I); |
3165 | performCongruenceFinding(I, E: Symbolized); |
3166 | } |
3167 | processOutgoingEdges(TI: I, B: I->getParent()); |
3168 | } |
3169 | } |
3170 | |
3171 | // Check if there is a path, using single or equal argument phi nodes, from |
3172 | // First to Second. |
3173 | bool NewGVN::singleReachablePHIPath( |
3174 | SmallPtrSet<const MemoryAccess *, 8> &Visited, const MemoryAccess *First, |
3175 | const MemoryAccess *Second) const { |
3176 | if (First == Second) |
3177 | return true; |
3178 | if (MSSA->isLiveOnEntryDef(MA: First)) |
3179 | return false; |
3180 | |
3181 | // This is not perfect, but as we're just verifying here, we can live with |
3182 | // the loss of precision. The real solution would be that of doing strongly |
3183 | // connected component finding in this routine, and it's probably not worth |
3184 | // the complexity for the time being. So, we just keep a set of visited |
3185 | // MemoryAccess and return true when we hit a cycle. |
3186 | if (!Visited.insert(Ptr: First).second) |
3187 | return true; |
3188 | |
3189 | const auto *EndDef = First; |
3190 | for (const auto *ChainDef : optimized_def_chain(MA: First)) { |
3191 | if (ChainDef == Second) |
3192 | return true; |
3193 | if (MSSA->isLiveOnEntryDef(MA: ChainDef)) |
3194 | return false; |
3195 | EndDef = ChainDef; |
3196 | } |
3197 | auto *MP = cast<MemoryPhi>(Val: EndDef); |
3198 | auto ReachableOperandPred = [&](const Use &U) { |
3199 | return ReachableEdges.count(V: {MP->getIncomingBlock(U), MP->getBlock()}); |
3200 | }; |
3201 | auto FilteredPhiArgs = |
3202 | make_filter_range(Range: MP->operands(), Pred: ReachableOperandPred); |
3203 | SmallVector<const Value *, 32> OperandList(FilteredPhiArgs); |
3204 | bool Okay = all_equal(Range&: OperandList); |
3205 | if (Okay) |
3206 | return singleReachablePHIPath(Visited, First: cast<MemoryAccess>(Val: OperandList[0]), |
3207 | Second); |
3208 | return false; |
3209 | } |
3210 | |
3211 | // Verify the that the memory equivalence table makes sense relative to the |
3212 | // congruence classes. Note that this checking is not perfect, and is currently |
3213 | // subject to very rare false negatives. It is only useful for |
3214 | // testing/debugging. |
3215 | void NewGVN::verifyMemoryCongruency() const { |
3216 | #ifndef NDEBUG |
3217 | // Verify that the memory table equivalence and memory member set match |
3218 | for (const auto *CC : CongruenceClasses) { |
3219 | if (CC == TOPClass || CC->isDead()) |
3220 | continue; |
3221 | if (CC->getStoreCount() != 0) { |
3222 | assert((CC->getStoredValue() || !isa<StoreInst>(CC->getLeader())) && |
3223 | "Any class with a store as a leader should have a " |
3224 | "representative stored value" ); |
3225 | assert(CC->getMemoryLeader() && |
3226 | "Any congruence class with a store should have a " |
3227 | "representative access" ); |
3228 | } |
3229 | |
3230 | if (CC->getMemoryLeader()) |
3231 | assert(MemoryAccessToClass.lookup(CC->getMemoryLeader()) == CC && |
3232 | "Representative MemoryAccess does not appear to be reverse " |
3233 | "mapped properly" ); |
3234 | for (const auto *M : CC->memory()) |
3235 | assert(MemoryAccessToClass.lookup(M) == CC && |
3236 | "Memory member does not appear to be reverse mapped properly" ); |
3237 | } |
3238 | |
3239 | // Anything equivalent in the MemoryAccess table should be in the same |
3240 | // congruence class. |
3241 | |
3242 | // Filter out the unreachable and trivially dead entries, because they may |
3243 | // never have been updated if the instructions were not processed. |
3244 | auto ReachableAccessPred = |
3245 | [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) { |
3246 | bool Result = ReachableBlocks.count(Pair.first->getBlock()); |
3247 | if (!Result || MSSA->isLiveOnEntryDef(Pair.first) || |
3248 | MemoryToDFSNum(Pair.first) == 0) |
3249 | return false; |
3250 | if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) |
3251 | return !isInstructionTriviallyDead(MemDef->getMemoryInst()); |
3252 | |
3253 | // We could have phi nodes which operands are all trivially dead, |
3254 | // so we don't process them. |
3255 | if (auto *MemPHI = dyn_cast<MemoryPhi>(Pair.first)) { |
3256 | for (const auto &U : MemPHI->incoming_values()) { |
3257 | if (auto *I = dyn_cast<Instruction>(&*U)) { |
3258 | if (!isInstructionTriviallyDead(I)) |
3259 | return true; |
3260 | } |
3261 | } |
3262 | return false; |
3263 | } |
3264 | |
3265 | return true; |
3266 | }; |
3267 | |
3268 | auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); |
3269 | for (auto KV : Filtered) { |
3270 | if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { |
3271 | auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader()); |
3272 | if (FirstMUD && SecondMUD) { |
3273 | SmallPtrSet<const MemoryAccess *, 8> VisitedMAS; |
3274 | assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) || |
3275 | ValueToClass.lookup(FirstMUD->getMemoryInst()) == |
3276 | ValueToClass.lookup(SecondMUD->getMemoryInst())) && |
3277 | "The instructions for these memory operations should have " |
3278 | "been in the same congruence class or reachable through" |
3279 | "a single argument phi" ); |
3280 | } |
3281 | } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { |
3282 | // We can only sanely verify that MemoryDefs in the operand list all have |
3283 | // the same class. |
3284 | auto ReachableOperandPred = [&](const Use &U) { |
3285 | return ReachableEdges.count( |
3286 | {FirstMP->getIncomingBlock(U), FirstMP->getBlock()}) && |
3287 | isa<MemoryDef>(U); |
3288 | }; |
3289 | // All arguments should in the same class, ignoring unreachable arguments |
3290 | auto FilteredPhiArgs = |
3291 | make_filter_range(FirstMP->operands(), ReachableOperandPred); |
3292 | SmallVector<const CongruenceClass *, 16> PhiOpClasses; |
3293 | std::transform(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), |
3294 | std::back_inserter(PhiOpClasses), [&](const Use &U) { |
3295 | const MemoryDef *MD = cast<MemoryDef>(U); |
3296 | return ValueToClass.lookup(MD->getMemoryInst()); |
3297 | }); |
3298 | assert(all_equal(PhiOpClasses) && |
3299 | "All MemoryPhi arguments should be in the same class" ); |
3300 | } |
3301 | } |
3302 | #endif |
3303 | } |
3304 | |
3305 | // Verify that the sparse propagation we did actually found the maximal fixpoint |
3306 | // We do this by storing the value to class mapping, touching all instructions, |
3307 | // and redoing the iteration to see if anything changed. |
3308 | void NewGVN::verifyIterationSettled(Function &F) { |
3309 | #ifndef NDEBUG |
3310 | LLVM_DEBUG(dbgs() << "Beginning iteration verification\n" ); |
3311 | if (DebugCounter::isCounterSet(VNCounter)) |
3312 | DebugCounter::setCounterState(VNCounter, StartingVNCounter); |
3313 | |
3314 | // Note that we have to store the actual classes, as we may change existing |
3315 | // classes during iteration. This is because our memory iteration propagation |
3316 | // is not perfect, and so may waste a little work. But it should generate |
3317 | // exactly the same congruence classes we have now, with different IDs. |
3318 | std::map<const Value *, CongruenceClass> BeforeIteration; |
3319 | |
3320 | for (auto &KV : ValueToClass) { |
3321 | if (auto *I = dyn_cast<Instruction>(KV.first)) |
3322 | // Skip unused/dead instructions. |
3323 | if (InstrToDFSNum(I) == 0) |
3324 | continue; |
3325 | BeforeIteration.insert({KV.first, *KV.second}); |
3326 | } |
3327 | |
3328 | TouchedInstructions.set(); |
3329 | TouchedInstructions.reset(0); |
3330 | OpSafeForPHIOfOps.clear(); |
3331 | CacheIdx = 0; |
3332 | iterateTouchedInstructions(); |
3333 | DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>> |
3334 | EqualClasses; |
3335 | for (const auto &KV : ValueToClass) { |
3336 | if (auto *I = dyn_cast<Instruction>(KV.first)) |
3337 | // Skip unused/dead instructions. |
3338 | if (InstrToDFSNum(I) == 0) |
3339 | continue; |
3340 | // We could sink these uses, but i think this adds a bit of clarity here as |
3341 | // to what we are comparing. |
3342 | auto *BeforeCC = &BeforeIteration.find(KV.first)->second; |
3343 | auto *AfterCC = KV.second; |
3344 | // Note that the classes can't change at this point, so we memoize the set |
3345 | // that are equal. |
3346 | if (!EqualClasses.count({BeforeCC, AfterCC})) { |
3347 | assert(BeforeCC->isEquivalentTo(AfterCC) && |
3348 | "Value number changed after main loop completed!" ); |
3349 | EqualClasses.insert({BeforeCC, AfterCC}); |
3350 | } |
3351 | } |
3352 | #endif |
3353 | } |
3354 | |
3355 | // Verify that for each store expression in the expression to class mapping, |
3356 | // only the latest appears, and multiple ones do not appear. |
3357 | // Because loads do not use the stored value when doing equality with stores, |
3358 | // if we don't erase the old store expressions from the table, a load can find |
3359 | // a no-longer valid StoreExpression. |
3360 | void NewGVN::verifyStoreExpressions() const { |
3361 | #ifndef NDEBUG |
3362 | // This is the only use of this, and it's not worth defining a complicated |
3363 | // densemapinfo hash/equality function for it. |
3364 | std::set< |
3365 | std::pair<const Value *, |
3366 | std::tuple<const Value *, const CongruenceClass *, Value *>>> |
3367 | StoreExpressionSet; |
3368 | for (const auto &KV : ExpressionToClass) { |
3369 | if (auto *SE = dyn_cast<StoreExpression>(KV.first)) { |
3370 | // Make sure a version that will conflict with loads is not already there |
3371 | auto Res = StoreExpressionSet.insert( |
3372 | {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second, |
3373 | SE->getStoredValue())}); |
3374 | bool Okay = Res.second; |
3375 | // It's okay to have the same expression already in there if it is |
3376 | // identical in nature. |
3377 | // This can happen when the leader of the stored value changes over time. |
3378 | if (!Okay) |
3379 | Okay = (std::get<1>(Res.first->second) == KV.second) && |
3380 | (lookupOperandLeader(std::get<2>(Res.first->second)) == |
3381 | lookupOperandLeader(SE->getStoredValue())); |
3382 | assert(Okay && "Stored expression conflict exists in expression table" ); |
3383 | auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst()); |
3384 | assert(ValueExpr && ValueExpr->equals(*SE) && |
3385 | "StoreExpression in ExpressionToClass is not latest " |
3386 | "StoreExpression for value" ); |
3387 | } |
3388 | } |
3389 | #endif |
3390 | } |
3391 | |
3392 | // This is the main value numbering loop, it iterates over the initial touched |
3393 | // instruction set, propagating value numbers, marking things touched, etc, |
3394 | // until the set of touched instructions is completely empty. |
3395 | void NewGVN::iterateTouchedInstructions() { |
3396 | uint64_t Iterations = 0; |
3397 | // Figure out where touchedinstructions starts |
3398 | int FirstInstr = TouchedInstructions.find_first(); |
3399 | // Nothing set, nothing to iterate, just return. |
3400 | if (FirstInstr == -1) |
3401 | return; |
3402 | const BasicBlock *LastBlock = getBlockForValue(V: InstrFromDFSNum(DFSNum: FirstInstr)); |
3403 | while (TouchedInstructions.any()) { |
3404 | ++Iterations; |
3405 | // Walk through all the instructions in all the blocks in RPO. |
3406 | // TODO: As we hit a new block, we should push and pop equalities into a |
3407 | // table lookupOperandLeader can use, to catch things PredicateInfo |
3408 | // might miss, like edge-only equivalences. |
3409 | for (unsigned InstrNum : TouchedInstructions.set_bits()) { |
3410 | |
3411 | // This instruction was found to be dead. We don't bother looking |
3412 | // at it again. |
3413 | if (InstrNum == 0) { |
3414 | TouchedInstructions.reset(Idx: InstrNum); |
3415 | continue; |
3416 | } |
3417 | |
3418 | Value *V = InstrFromDFSNum(DFSNum: InstrNum); |
3419 | const BasicBlock *CurrBlock = getBlockForValue(V); |
3420 | |
3421 | // If we hit a new block, do reachability processing. |
3422 | if (CurrBlock != LastBlock) { |
3423 | LastBlock = CurrBlock; |
3424 | bool BlockReachable = ReachableBlocks.count(Ptr: CurrBlock); |
3425 | const auto &CurrInstRange = BlockInstRange.lookup(Val: CurrBlock); |
3426 | |
3427 | // If it's not reachable, erase any touched instructions and move on. |
3428 | if (!BlockReachable) { |
3429 | TouchedInstructions.reset(I: CurrInstRange.first, E: CurrInstRange.second); |
3430 | LLVM_DEBUG(dbgs() << "Skipping instructions in block " |
3431 | << getBlockName(CurrBlock) |
3432 | << " because it is unreachable\n" ); |
3433 | continue; |
3434 | } |
3435 | // Use the appropriate cache for "OpIsSafeForPHIOfOps". |
3436 | CacheIdx = RPOOrdering.lookup(Val: DT->getNode(BB: CurrBlock)) - 1; |
3437 | updateProcessedCount(V: CurrBlock); |
3438 | } |
3439 | // Reset after processing (because we may mark ourselves as touched when |
3440 | // we propagate equalities). |
3441 | TouchedInstructions.reset(Idx: InstrNum); |
3442 | |
3443 | if (auto *MP = dyn_cast<MemoryPhi>(Val: V)) { |
3444 | LLVM_DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n" ); |
3445 | valueNumberMemoryPhi(MP); |
3446 | } else if (auto *I = dyn_cast<Instruction>(Val: V)) { |
3447 | valueNumberInstruction(I); |
3448 | } else { |
3449 | llvm_unreachable("Should have been a MemoryPhi or Instruction" ); |
3450 | } |
3451 | updateProcessedCount(V); |
3452 | } |
3453 | } |
3454 | NumGVNMaxIterations = std::max(a: NumGVNMaxIterations.getValue(), b: Iterations); |
3455 | } |
3456 | |
3457 | // This is the main transformation entry point. |
3458 | bool NewGVN::runGVN() { |
3459 | if (DebugCounter::isCounterSet(ID: VNCounter)) |
3460 | StartingVNCounter = DebugCounter::getCounterState(ID: VNCounter); |
3461 | bool Changed = false; |
3462 | NumFuncArgs = F.arg_size(); |
3463 | MSSAWalker = MSSA->getWalker(); |
3464 | SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); |
3465 | |
3466 | // Count number of instructions for sizing of hash tables, and come |
3467 | // up with a global dfs numbering for instructions. |
3468 | unsigned ICount = 1; |
3469 | // Add an empty instruction to account for the fact that we start at 1 |
3470 | DFSToInstr.emplace_back(Args: nullptr); |
3471 | // Note: We want ideal RPO traversal of the blocks, which is not quite the |
3472 | // same as dominator tree order, particularly with regard whether backedges |
3473 | // get visited first or second, given a block with multiple successors. |
3474 | // If we visit in the wrong order, we will end up performing N times as many |
3475 | // iterations. |
3476 | // The dominator tree does guarantee that, for a given dom tree node, it's |
3477 | // parent must occur before it in the RPO ordering. Thus, we only need to sort |
3478 | // the siblings. |
3479 | ReversePostOrderTraversal<Function *> RPOT(&F); |
3480 | unsigned Counter = 0; |
3481 | for (auto &B : RPOT) { |
3482 | auto *Node = DT->getNode(BB: B); |
3483 | assert(Node && "RPO and Dominator tree should have same reachability" ); |
3484 | RPOOrdering[Node] = ++Counter; |
3485 | } |
3486 | // Sort dominator tree children arrays into RPO. |
3487 | for (auto &B : RPOT) { |
3488 | auto *Node = DT->getNode(BB: B); |
3489 | if (Node->getNumChildren() > 1) |
3490 | llvm::sort(C&: *Node, Comp: [&](const DomTreeNode *A, const DomTreeNode *B) { |
3491 | return RPOOrdering[A] < RPOOrdering[B]; |
3492 | }); |
3493 | } |
3494 | |
3495 | // Now a standard depth first ordering of the domtree is equivalent to RPO. |
3496 | for (auto *DTN : depth_first(G: DT->getRootNode())) { |
3497 | BasicBlock *B = DTN->getBlock(); |
3498 | const auto &BlockRange = assignDFSNumbers(B, Start: ICount); |
3499 | BlockInstRange.insert(KV: {B, BlockRange}); |
3500 | ICount += BlockRange.second - BlockRange.first; |
3501 | } |
3502 | initializeCongruenceClasses(F); |
3503 | |
3504 | TouchedInstructions.resize(N: ICount); |
3505 | // Ensure we don't end up resizing the expressionToClass map, as |
3506 | // that can be quite expensive. At most, we have one expression per |
3507 | // instruction. |
3508 | ExpressionToClass.reserve(NumEntries: ICount); |
3509 | |
3510 | // Initialize the touched instructions to include the entry block. |
3511 | const auto &InstRange = BlockInstRange.lookup(Val: &F.getEntryBlock()); |
3512 | TouchedInstructions.set(I: InstRange.first, E: InstRange.second); |
3513 | LLVM_DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) |
3514 | << " marked reachable\n" ); |
3515 | ReachableBlocks.insert(Ptr: &F.getEntryBlock()); |
3516 | // Use index corresponding to entry block. |
3517 | CacheIdx = 0; |
3518 | |
3519 | iterateTouchedInstructions(); |
3520 | verifyMemoryCongruency(); |
3521 | verifyIterationSettled(F); |
3522 | verifyStoreExpressions(); |
3523 | |
3524 | Changed |= eliminateInstructions(F); |
3525 | |
3526 | // Delete all instructions marked for deletion. |
3527 | for (Instruction *ToErase : InstructionsToErase) { |
3528 | if (!ToErase->use_empty()) |
3529 | ToErase->replaceAllUsesWith(V: PoisonValue::get(T: ToErase->getType())); |
3530 | |
3531 | assert(ToErase->getParent() && |
3532 | "BB containing ToErase deleted unexpectedly!" ); |
3533 | ToErase->eraseFromParent(); |
3534 | } |
3535 | Changed |= !InstructionsToErase.empty(); |
3536 | |
3537 | // Delete all unreachable blocks. |
3538 | auto UnreachableBlockPred = [&](const BasicBlock &BB) { |
3539 | return !ReachableBlocks.count(Ptr: &BB); |
3540 | }; |
3541 | |
3542 | for (auto &BB : make_filter_range(Range&: F, Pred: UnreachableBlockPred)) { |
3543 | LLVM_DEBUG(dbgs() << "We believe block " << getBlockName(&BB) |
3544 | << " is unreachable\n" ); |
3545 | deleteInstructionsInBlock(&BB); |
3546 | Changed = true; |
3547 | } |
3548 | |
3549 | cleanupTables(); |
3550 | return Changed; |
3551 | } |
3552 | |
3553 | struct NewGVN::ValueDFS { |
3554 | int DFSIn = 0; |
3555 | int DFSOut = 0; |
3556 | int LocalNum = 0; |
3557 | |
3558 | // Only one of Def and U will be set. |
3559 | // The bool in the Def tells us whether the Def is the stored value of a |
3560 | // store. |
3561 | PointerIntPair<Value *, 1, bool> Def; |
3562 | Use *U = nullptr; |
3563 | |
3564 | bool operator<(const ValueDFS &Other) const { |
3565 | // It's not enough that any given field be less than - we have sets |
3566 | // of fields that need to be evaluated together to give a proper ordering. |
3567 | // For example, if you have; |
3568 | // DFS (1, 3) |
3569 | // Val 0 |
3570 | // DFS (1, 2) |
3571 | // Val 50 |
3572 | // We want the second to be less than the first, but if we just go field |
3573 | // by field, we will get to Val 0 < Val 50 and say the first is less than |
3574 | // the second. We only want it to be less than if the DFS orders are equal. |
3575 | // |
3576 | // Each LLVM instruction only produces one value, and thus the lowest-level |
3577 | // differentiator that really matters for the stack (and what we use as a |
3578 | // replacement) is the local dfs number. |
3579 | // Everything else in the structure is instruction level, and only affects |
3580 | // the order in which we will replace operands of a given instruction. |
3581 | // |
3582 | // For a given instruction (IE things with equal dfsin, dfsout, localnum), |
3583 | // the order of replacement of uses does not matter. |
3584 | // IE given, |
3585 | // a = 5 |
3586 | // b = a + a |
3587 | // When you hit b, you will have two valuedfs with the same dfsin, out, and |
3588 | // localnum. |
3589 | // The .val will be the same as well. |
3590 | // The .u's will be different. |
3591 | // You will replace both, and it does not matter what order you replace them |
3592 | // in (IE whether you replace operand 2, then operand 1, or operand 1, then |
3593 | // operand 2). |
3594 | // Similarly for the case of same dfsin, dfsout, localnum, but different |
3595 | // .val's |
3596 | // a = 5 |
3597 | // b = 6 |
3598 | // c = a + b |
3599 | // in c, we will a valuedfs for a, and one for b,with everything the same |
3600 | // but .val and .u. |
3601 | // It does not matter what order we replace these operands in. |
3602 | // You will always end up with the same IR, and this is guaranteed. |
3603 | return std::tie(args: DFSIn, args: DFSOut, args: LocalNum, args: Def, args: U) < |
3604 | std::tie(args: Other.DFSIn, args: Other.DFSOut, args: Other.LocalNum, args: Other.Def, |
3605 | args: Other.U); |
3606 | } |
3607 | }; |
3608 | |
3609 | // This function converts the set of members for a congruence class from values, |
3610 | // to sets of defs and uses with associated DFS info. The total number of |
3611 | // reachable uses for each value is stored in UseCount, and instructions that |
3612 | // seem |
3613 | // dead (have no non-dead uses) are stored in ProbablyDead. |
3614 | void NewGVN::convertClassToDFSOrdered( |
3615 | const CongruenceClass &Dense, SmallVectorImpl<ValueDFS> &DFSOrderedSet, |
3616 | DenseMap<const Value *, unsigned int> &UseCounts, |
3617 | SmallPtrSetImpl<Instruction *> &ProbablyDead) const { |
3618 | for (auto *D : Dense) { |
3619 | // First add the value. |
3620 | BasicBlock *BB = getBlockForValue(V: D); |
3621 | // Constants are handled prior to ever calling this function, so |
3622 | // we should only be left with instructions as members. |
3623 | assert(BB && "Should have figured out a basic block for value" ); |
3624 | ValueDFS VDDef; |
3625 | DomTreeNode *DomNode = DT->getNode(BB); |
3626 | VDDef.DFSIn = DomNode->getDFSNumIn(); |
3627 | VDDef.DFSOut = DomNode->getDFSNumOut(); |
3628 | // If it's a store, use the leader of the value operand, if it's always |
3629 | // available, or the value operand. TODO: We could do dominance checks to |
3630 | // find a dominating leader, but not worth it ATM. |
3631 | if (auto *SI = dyn_cast<StoreInst>(Val: D)) { |
3632 | auto Leader = lookupOperandLeader(V: SI->getValueOperand()); |
3633 | if (alwaysAvailable(V: Leader)) { |
3634 | VDDef.Def.setPointer(Leader); |
3635 | } else { |
3636 | VDDef.Def.setPointer(SI->getValueOperand()); |
3637 | VDDef.Def.setInt(true); |
3638 | } |
3639 | } else { |
3640 | VDDef.Def.setPointer(D); |
3641 | } |
3642 | assert(isa<Instruction>(D) && |
3643 | "The dense set member should always be an instruction" ); |
3644 | Instruction *Def = cast<Instruction>(Val: D); |
3645 | VDDef.LocalNum = InstrToDFSNum(V: D); |
3646 | DFSOrderedSet.push_back(Elt: VDDef); |
3647 | // If there is a phi node equivalent, add it |
3648 | if (auto *PN = RealToTemp.lookup(Val: Def)) { |
3649 | auto *PHIE = |
3650 | dyn_cast_or_null<PHIExpression>(Val: ValueToExpression.lookup(Val: Def)); |
3651 | if (PHIE) { |
3652 | VDDef.Def.setInt(false); |
3653 | VDDef.Def.setPointer(PN); |
3654 | VDDef.LocalNum = 0; |
3655 | DFSOrderedSet.push_back(Elt: VDDef); |
3656 | } |
3657 | } |
3658 | |
3659 | unsigned int UseCount = 0; |
3660 | // Now add the uses. |
3661 | for (auto &U : Def->uses()) { |
3662 | if (auto *I = dyn_cast<Instruction>(Val: U.getUser())) { |
3663 | // Don't try to replace into dead uses |
3664 | if (InstructionsToErase.count(Ptr: I)) |
3665 | continue; |
3666 | ValueDFS VDUse; |
3667 | // Put the phi node uses in the incoming block. |
3668 | BasicBlock *IBlock; |
3669 | if (auto *P = dyn_cast<PHINode>(Val: I)) { |
3670 | IBlock = P->getIncomingBlock(U); |
3671 | // Make phi node users appear last in the incoming block |
3672 | // they are from. |
3673 | VDUse.LocalNum = InstrDFS.size() + 1; |
3674 | } else { |
3675 | IBlock = getBlockForValue(V: I); |
3676 | VDUse.LocalNum = InstrToDFSNum(V: I); |
3677 | } |
3678 | |
3679 | // Skip uses in unreachable blocks, as we're going |
3680 | // to delete them. |
3681 | if (!ReachableBlocks.contains(Ptr: IBlock)) |
3682 | continue; |
3683 | |
3684 | DomTreeNode *DomNode = DT->getNode(BB: IBlock); |
3685 | VDUse.DFSIn = DomNode->getDFSNumIn(); |
3686 | VDUse.DFSOut = DomNode->getDFSNumOut(); |
3687 | VDUse.U = &U; |
3688 | ++UseCount; |
3689 | DFSOrderedSet.emplace_back(Args&: VDUse); |
3690 | } |
3691 | } |
3692 | |
3693 | // If there are no uses, it's probably dead (but it may have side-effects, |
3694 | // so not definitely dead. Otherwise, store the number of uses so we can |
3695 | // track if it becomes dead later). |
3696 | if (UseCount == 0) |
3697 | ProbablyDead.insert(Ptr: Def); |
3698 | else |
3699 | UseCounts[Def] = UseCount; |
3700 | } |
3701 | } |
3702 | |
3703 | // This function converts the set of members for a congruence class from values, |
3704 | // to the set of defs for loads and stores, with associated DFS info. |
3705 | void NewGVN::convertClassToLoadsAndStores( |
3706 | const CongruenceClass &Dense, |
3707 | SmallVectorImpl<ValueDFS> &LoadsAndStores) const { |
3708 | for (auto *D : Dense) { |
3709 | if (!isa<LoadInst>(Val: D) && !isa<StoreInst>(Val: D)) |
3710 | continue; |
3711 | |
3712 | BasicBlock *BB = getBlockForValue(V: D); |
3713 | ValueDFS VD; |
3714 | DomTreeNode *DomNode = DT->getNode(BB); |
3715 | VD.DFSIn = DomNode->getDFSNumIn(); |
3716 | VD.DFSOut = DomNode->getDFSNumOut(); |
3717 | VD.Def.setPointer(D); |
3718 | |
3719 | // If it's an instruction, use the real local dfs number. |
3720 | if (auto *I = dyn_cast<Instruction>(Val: D)) |
3721 | VD.LocalNum = InstrToDFSNum(V: I); |
3722 | else |
3723 | llvm_unreachable("Should have been an instruction" ); |
3724 | |
3725 | LoadsAndStores.emplace_back(Args&: VD); |
3726 | } |
3727 | } |
3728 | |
3729 | static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { |
3730 | patchReplacementInstruction(I, Repl); |
3731 | I->replaceAllUsesWith(V: Repl); |
3732 | } |
3733 | |
3734 | void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { |
3735 | LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << *BB); |
3736 | ++NumGVNBlocksDeleted; |
3737 | |
3738 | // Delete the instructions backwards, as it has a reduced likelihood of having |
3739 | // to update as many def-use and use-def chains. Start after the terminator. |
3740 | auto StartPoint = BB->rbegin(); |
3741 | ++StartPoint; |
3742 | // Note that we explicitly recalculate BB->rend() on each iteration, |
3743 | // as it may change when we remove the first instruction. |
3744 | for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) { |
3745 | Instruction &Inst = *I++; |
3746 | if (!Inst.use_empty()) |
3747 | Inst.replaceAllUsesWith(V: PoisonValue::get(T: Inst.getType())); |
3748 | if (isa<LandingPadInst>(Val: Inst)) |
3749 | continue; |
3750 | salvageKnowledge(I: &Inst, AC); |
3751 | |
3752 | Inst.eraseFromParent(); |
3753 | ++NumGVNInstrDeleted; |
3754 | } |
3755 | // Now insert something that simplifycfg will turn into an unreachable. |
3756 | Type *Int8Ty = Type::getInt8Ty(C&: BB->getContext()); |
3757 | new StoreInst( |
3758 | PoisonValue::get(T: Int8Ty), |
3759 | Constant::getNullValue(Ty: PointerType::getUnqual(C&: BB->getContext())), |
3760 | BB->getTerminator()->getIterator()); |
3761 | } |
3762 | |
3763 | void NewGVN::markInstructionForDeletion(Instruction *I) { |
3764 | LLVM_DEBUG(dbgs() << "Marking " << *I << " for deletion\n" ); |
3765 | InstructionsToErase.insert(Ptr: I); |
3766 | } |
3767 | |
3768 | void NewGVN::replaceInstruction(Instruction *I, Value *V) { |
3769 | LLVM_DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n" ); |
3770 | patchAndReplaceAllUsesWith(I, Repl: V); |
3771 | // We save the actual erasing to avoid invalidating memory |
3772 | // dependencies until we are done with everything. |
3773 | markInstructionForDeletion(I); |
3774 | } |
3775 | |
3776 | namespace { |
3777 | |
3778 | // This is a stack that contains both the value and dfs info of where |
3779 | // that value is valid. |
3780 | class ValueDFSStack { |
3781 | public: |
3782 | Value *back() const { return ValueStack.back(); } |
3783 | std::pair<int, int> dfs_back() const { return DFSStack.back(); } |
3784 | |
3785 | void push_back(Value *V, int DFSIn, int DFSOut) { |
3786 | ValueStack.emplace_back(Args&: V); |
3787 | DFSStack.emplace_back(Args&: DFSIn, Args&: DFSOut); |
3788 | } |
3789 | |
3790 | bool empty() const { return DFSStack.empty(); } |
3791 | |
3792 | bool isInScope(int DFSIn, int DFSOut) const { |
3793 | if (empty()) |
3794 | return false; |
3795 | return DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second; |
3796 | } |
3797 | |
3798 | void popUntilDFSScope(int DFSIn, int DFSOut) { |
3799 | |
3800 | // These two should always be in sync at this point. |
3801 | assert(ValueStack.size() == DFSStack.size() && |
3802 | "Mismatch between ValueStack and DFSStack" ); |
3803 | while ( |
3804 | !DFSStack.empty() && |
3805 | !(DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second)) { |
3806 | DFSStack.pop_back(); |
3807 | ValueStack.pop_back(); |
3808 | } |
3809 | } |
3810 | |
3811 | private: |
3812 | SmallVector<Value *, 8> ValueStack; |
3813 | SmallVector<std::pair<int, int>, 8> DFSStack; |
3814 | }; |
3815 | |
3816 | } // end anonymous namespace |
3817 | |
3818 | // Given an expression, get the congruence class for it. |
3819 | CongruenceClass *NewGVN::getClassForExpression(const Expression *E) const { |
3820 | if (auto *VE = dyn_cast<VariableExpression>(Val: E)) |
3821 | return ValueToClass.lookup(Val: VE->getVariableValue()); |
3822 | else if (isa<DeadExpression>(Val: E)) |
3823 | return TOPClass; |
3824 | return ExpressionToClass.lookup(Val: E); |
3825 | } |
3826 | |
3827 | // Given a value and a basic block we are trying to see if it is available in, |
3828 | // see if the value has a leader available in that block. |
3829 | Value *NewGVN::findPHIOfOpsLeader(const Expression *E, |
3830 | const Instruction *OrigInst, |
3831 | const BasicBlock *BB) const { |
3832 | // It would already be constant if we could make it constant |
3833 | if (auto *CE = dyn_cast<ConstantExpression>(Val: E)) |
3834 | return CE->getConstantValue(); |
3835 | if (auto *VE = dyn_cast<VariableExpression>(Val: E)) { |
3836 | auto *V = VE->getVariableValue(); |
3837 | if (alwaysAvailable(V) || DT->dominates(A: getBlockForValue(V), B: BB)) |
3838 | return VE->getVariableValue(); |
3839 | } |
3840 | |
3841 | auto *CC = getClassForExpression(E); |
3842 | if (!CC) |
3843 | return nullptr; |
3844 | if (alwaysAvailable(V: CC->getLeader())) |
3845 | return CC->getLeader(); |
3846 | |
3847 | for (auto *Member : *CC) { |
3848 | auto *MemberInst = dyn_cast<Instruction>(Val: Member); |
3849 | if (MemberInst == OrigInst) |
3850 | continue; |
3851 | // Anything that isn't an instruction is always available. |
3852 | if (!MemberInst) |
3853 | return Member; |
3854 | if (DT->dominates(A: getBlockForValue(V: MemberInst), B: BB)) |
3855 | return Member; |
3856 | } |
3857 | return nullptr; |
3858 | } |
3859 | |
3860 | bool NewGVN::eliminateInstructions(Function &F) { |
3861 | // This is a non-standard eliminator. The normal way to eliminate is |
3862 | // to walk the dominator tree in order, keeping track of available |
3863 | // values, and eliminating them. However, this is mildly |
3864 | // pointless. It requires doing lookups on every instruction, |
3865 | // regardless of whether we will ever eliminate it. For |
3866 | // instructions part of most singleton congruence classes, we know we |
3867 | // will never eliminate them. |
3868 | |
3869 | // Instead, this eliminator looks at the congruence classes directly, sorts |
3870 | // them into a DFS ordering of the dominator tree, and then we just |
3871 | // perform elimination straight on the sets by walking the congruence |
3872 | // class member uses in order, and eliminate the ones dominated by the |
3873 | // last member. This is worst case O(E log E) where E = number of |
3874 | // instructions in a single congruence class. In theory, this is all |
3875 | // instructions. In practice, it is much faster, as most instructions are |
3876 | // either in singleton congruence classes or can't possibly be eliminated |
3877 | // anyway (if there are no overlapping DFS ranges in class). |
3878 | // When we find something not dominated, it becomes the new leader |
3879 | // for elimination purposes. |
3880 | // TODO: If we wanted to be faster, We could remove any members with no |
3881 | // overlapping ranges while sorting, as we will never eliminate anything |
3882 | // with those members, as they don't dominate anything else in our set. |
3883 | |
3884 | bool AnythingReplaced = false; |
3885 | |
3886 | // Since we are going to walk the domtree anyway, and we can't guarantee the |
3887 | // DFS numbers are updated, we compute some ourselves. |
3888 | DT->updateDFSNumbers(); |
3889 | |
3890 | // Go through all of our phi nodes, and kill the arguments associated with |
3891 | // unreachable edges. |
3892 | auto ReplaceUnreachablePHIArgs = [&](PHINode *PHI, BasicBlock *BB) { |
3893 | for (auto &Operand : PHI->incoming_values()) |
3894 | if (!ReachableEdges.count(V: {PHI->getIncomingBlock(U: Operand), BB})) { |
3895 | LLVM_DEBUG(dbgs() << "Replacing incoming value of " << PHI |
3896 | << " for block " |
3897 | << getBlockName(PHI->getIncomingBlock(Operand)) |
3898 | << " with poison due to it being unreachable\n" ); |
3899 | Operand.set(PoisonValue::get(T: PHI->getType())); |
3900 | } |
3901 | }; |
3902 | // Replace unreachable phi arguments. |
3903 | // At this point, RevisitOnReachabilityChange only contains: |
3904 | // |
3905 | // 1. PHIs |
3906 | // 2. Temporaries that will convert to PHIs |
3907 | // 3. Operations that are affected by an unreachable edge but do not fit into |
3908 | // 1 or 2 (rare). |
3909 | // So it is a slight overshoot of what we want. We could make it exact by |
3910 | // using two SparseBitVectors per block. |
3911 | DenseMap<const BasicBlock *, unsigned> ReachablePredCount; |
3912 | for (auto &KV : ReachableEdges) |
3913 | ReachablePredCount[KV.getEnd()]++; |
3914 | for (auto &BBPair : RevisitOnReachabilityChange) { |
3915 | for (auto InstNum : BBPair.second) { |
3916 | auto *Inst = InstrFromDFSNum(DFSNum: InstNum); |
3917 | auto *PHI = dyn_cast<PHINode>(Val: Inst); |
3918 | PHI = PHI ? PHI : dyn_cast_or_null<PHINode>(Val: RealToTemp.lookup(Val: Inst)); |
3919 | if (!PHI) |
3920 | continue; |
3921 | auto *BB = BBPair.first; |
3922 | if (ReachablePredCount.lookup(Val: BB) != PHI->getNumIncomingValues()) |
3923 | ReplaceUnreachablePHIArgs(PHI, BB); |
3924 | } |
3925 | } |
3926 | |
3927 | // Map to store the use counts |
3928 | DenseMap<const Value *, unsigned int> UseCounts; |
3929 | for (auto *CC : reverse(C&: CongruenceClasses)) { |
3930 | LLVM_DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() |
3931 | << "\n" ); |
3932 | // Track the equivalent store info so we can decide whether to try |
3933 | // dead store elimination. |
3934 | SmallVector<ValueDFS, 8> PossibleDeadStores; |
3935 | SmallPtrSet<Instruction *, 8> ProbablyDead; |
3936 | if (CC->isDead() || CC->empty()) |
3937 | continue; |
3938 | // Everything still in the TOP class is unreachable or dead. |
3939 | if (CC == TOPClass) { |
3940 | for (auto *M : *CC) { |
3941 | auto *VTE = ValueToExpression.lookup(Val: M); |
3942 | if (VTE && isa<DeadExpression>(Val: VTE)) |
3943 | markInstructionForDeletion(I: cast<Instruction>(Val: M)); |
3944 | assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) || |
3945 | InstructionsToErase.count(cast<Instruction>(M))) && |
3946 | "Everything in TOP should be unreachable or dead at this " |
3947 | "point" ); |
3948 | } |
3949 | continue; |
3950 | } |
3951 | |
3952 | assert(CC->getLeader() && "We should have had a leader" ); |
3953 | // If this is a leader that is always available, and it's a |
3954 | // constant or has no equivalences, just replace everything with |
3955 | // it. We then update the congruence class with whatever members |
3956 | // are left. |
3957 | Value *Leader = |
3958 | CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader(); |
3959 | if (alwaysAvailable(V: Leader)) { |
3960 | CongruenceClass::MemberSet MembersLeft; |
3961 | for (auto *M : *CC) { |
3962 | Value *Member = M; |
3963 | // Void things have no uses we can replace. |
3964 | if (Member == Leader || !isa<Instruction>(Val: Member) || |
3965 | Member->getType()->isVoidTy()) { |
3966 | MembersLeft.insert(Ptr: Member); |
3967 | continue; |
3968 | } |
3969 | |
3970 | LLVM_DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " |
3971 | << *Member << "\n" ); |
3972 | auto *I = cast<Instruction>(Val: Member); |
3973 | assert(Leader != I && "About to accidentally remove our leader" ); |
3974 | replaceInstruction(I, V: Leader); |
3975 | AnythingReplaced = true; |
3976 | } |
3977 | CC->swap(Other&: MembersLeft); |
3978 | } else { |
3979 | // If this is a singleton, we can skip it. |
3980 | if (CC->size() != 1 || RealToTemp.count(Val: Leader)) { |
3981 | // This is a stack because equality replacement/etc may place |
3982 | // constants in the middle of the member list, and we want to use |
3983 | // those constant values in preference to the current leader, over |
3984 | // the scope of those constants. |
3985 | ValueDFSStack EliminationStack; |
3986 | |
3987 | // Convert the members to DFS ordered sets and then merge them. |
3988 | SmallVector<ValueDFS, 8> DFSOrderedSet; |
3989 | convertClassToDFSOrdered(Dense: *CC, DFSOrderedSet, UseCounts, ProbablyDead); |
3990 | |
3991 | // Sort the whole thing. |
3992 | llvm::sort(C&: DFSOrderedSet); |
3993 | for (auto &VD : DFSOrderedSet) { |
3994 | int MemberDFSIn = VD.DFSIn; |
3995 | int MemberDFSOut = VD.DFSOut; |
3996 | Value *Def = VD.Def.getPointer(); |
3997 | bool FromStore = VD.Def.getInt(); |
3998 | Use *U = VD.U; |
3999 | // We ignore void things because we can't get a value from them. |
4000 | if (Def && Def->getType()->isVoidTy()) |
4001 | continue; |
4002 | auto *DefInst = dyn_cast_or_null<Instruction>(Val: Def); |
4003 | if (DefInst && AllTempInstructions.count(V: DefInst)) { |
4004 | auto *PN = cast<PHINode>(Val: DefInst); |
4005 | |
4006 | // If this is a value phi and that's the expression we used, insert |
4007 | // it into the program |
4008 | // remove from temp instruction list. |
4009 | AllTempInstructions.erase(V: PN); |
4010 | auto *DefBlock = getBlockForValue(V: Def); |
4011 | LLVM_DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def |
4012 | << " into block " |
4013 | << getBlockName(getBlockForValue(Def)) << "\n" ); |
4014 | PN->insertBefore(InsertPos: DefBlock->begin()); |
4015 | Def = PN; |
4016 | NumGVNPHIOfOpsEliminations++; |
4017 | } |
4018 | |
4019 | if (EliminationStack.empty()) { |
4020 | LLVM_DEBUG(dbgs() << "Elimination Stack is empty\n" ); |
4021 | } else { |
4022 | LLVM_DEBUG(dbgs() << "Elimination Stack Top DFS numbers are (" |
4023 | << EliminationStack.dfs_back().first << "," |
4024 | << EliminationStack.dfs_back().second << ")\n" ); |
4025 | } |
4026 | |
4027 | LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," |
4028 | << MemberDFSOut << ")\n" ); |
4029 | // First, we see if we are out of scope or empty. If so, |
4030 | // and there equivalences, we try to replace the top of |
4031 | // stack with equivalences (if it's on the stack, it must |
4032 | // not have been eliminated yet). |
4033 | // Then we synchronize to our current scope, by |
4034 | // popping until we are back within a DFS scope that |
4035 | // dominates the current member. |
4036 | // Then, what happens depends on a few factors |
4037 | // If the stack is now empty, we need to push |
4038 | // If we have a constant or a local equivalence we want to |
4039 | // start using, we also push. |
4040 | // Otherwise, we walk along, processing members who are |
4041 | // dominated by this scope, and eliminate them. |
4042 | bool ShouldPush = Def && EliminationStack.empty(); |
4043 | bool OutOfScope = |
4044 | !EliminationStack.isInScope(DFSIn: MemberDFSIn, DFSOut: MemberDFSOut); |
4045 | |
4046 | if (OutOfScope || ShouldPush) { |
4047 | // Sync to our current scope. |
4048 | EliminationStack.popUntilDFSScope(DFSIn: MemberDFSIn, DFSOut: MemberDFSOut); |
4049 | bool ShouldPush = Def && EliminationStack.empty(); |
4050 | if (ShouldPush) { |
4051 | EliminationStack.push_back(V: Def, DFSIn: MemberDFSIn, DFSOut: MemberDFSOut); |
4052 | } |
4053 | } |
4054 | |
4055 | // Skip the Def's, we only want to eliminate on their uses. But mark |
4056 | // dominated defs as dead. |
4057 | if (Def) { |
4058 | // For anything in this case, what and how we value number |
4059 | // guarantees that any side-effects that would have occurred (ie |
4060 | // throwing, etc) can be proven to either still occur (because it's |
4061 | // dominated by something that has the same side-effects), or never |
4062 | // occur. Otherwise, we would not have been able to prove it value |
4063 | // equivalent to something else. For these things, we can just mark |
4064 | // it all dead. Note that this is different from the "ProbablyDead" |
4065 | // set, which may not be dominated by anything, and thus, are only |
4066 | // easy to prove dead if they are also side-effect free. Note that |
4067 | // because stores are put in terms of the stored value, we skip |
4068 | // stored values here. If the stored value is really dead, it will |
4069 | // still be marked for deletion when we process it in its own class. |
4070 | auto *DefI = dyn_cast<Instruction>(Val: Def); |
4071 | if (!EliminationStack.empty() && DefI && !FromStore) { |
4072 | Value *DominatingLeader = EliminationStack.back(); |
4073 | if (DominatingLeader != Def) { |
4074 | // Even if the instruction is removed, we still need to update |
4075 | // flags/metadata due to downstreams users of the leader. |
4076 | if (!match(V: DefI, P: m_Intrinsic<Intrinsic::ssa_copy>())) |
4077 | patchReplacementInstruction(I: DefI, Repl: DominatingLeader); |
4078 | |
4079 | markInstructionForDeletion(I: DefI); |
4080 | } |
4081 | } |
4082 | continue; |
4083 | } |
4084 | // At this point, we know it is a Use we are trying to possibly |
4085 | // replace. |
4086 | |
4087 | assert(isa<Instruction>(U->get()) && |
4088 | "Current def should have been an instruction" ); |
4089 | assert(isa<Instruction>(U->getUser()) && |
4090 | "Current user should have been an instruction" ); |
4091 | |
4092 | // If the thing we are replacing into is already marked to be dead, |
4093 | // this use is dead. Note that this is true regardless of whether |
4094 | // we have anything dominating the use or not. We do this here |
4095 | // because we are already walking all the uses anyway. |
4096 | Instruction *InstUse = cast<Instruction>(Val: U->getUser()); |
4097 | if (InstructionsToErase.count(Ptr: InstUse)) { |
4098 | auto &UseCount = UseCounts[U->get()]; |
4099 | if (--UseCount == 0) { |
4100 | ProbablyDead.insert(Ptr: cast<Instruction>(Val: U->get())); |
4101 | } |
4102 | } |
4103 | |
4104 | // If we get to this point, and the stack is empty we must have a use |
4105 | // with nothing we can use to eliminate this use, so just skip it. |
4106 | if (EliminationStack.empty()) |
4107 | continue; |
4108 | |
4109 | Value *DominatingLeader = EliminationStack.back(); |
4110 | |
4111 | auto *II = dyn_cast<IntrinsicInst>(Val: DominatingLeader); |
4112 | bool isSSACopy = II && II->getIntrinsicID() == Intrinsic::ssa_copy; |
4113 | if (isSSACopy) |
4114 | DominatingLeader = II->getOperand(i_nocapture: 0); |
4115 | |
4116 | // Don't replace our existing users with ourselves. |
4117 | if (U->get() == DominatingLeader) |
4118 | continue; |
4119 | |
4120 | // If we replaced something in an instruction, handle the patching of |
4121 | // metadata. Skip this if we are replacing predicateinfo with its |
4122 | // original operand, as we already know we can just drop it. |
4123 | auto *ReplacedInst = cast<Instruction>(Val: U->get()); |
4124 | auto *PI = PredInfo->getPredicateInfoFor(V: ReplacedInst); |
4125 | if (!PI || DominatingLeader != PI->OriginalOp) |
4126 | patchReplacementInstruction(I: ReplacedInst, Repl: DominatingLeader); |
4127 | |
4128 | LLVM_DEBUG(dbgs() |
4129 | << "Found replacement " << *DominatingLeader << " for " |
4130 | << *U->get() << " in " << *(U->getUser()) << "\n" ); |
4131 | U->set(DominatingLeader); |
4132 | // This is now a use of the dominating leader, which means if the |
4133 | // dominating leader was dead, it's now live! |
4134 | auto &LeaderUseCount = UseCounts[DominatingLeader]; |
4135 | // It's about to be alive again. |
4136 | if (LeaderUseCount == 0 && isa<Instruction>(Val: DominatingLeader)) |
4137 | ProbablyDead.erase(Ptr: cast<Instruction>(Val: DominatingLeader)); |
4138 | // For copy instructions, we use their operand as a leader, |
4139 | // which means we remove a user of the copy and it may become dead. |
4140 | if (isSSACopy) { |
4141 | auto It = UseCounts.find(Val: II); |
4142 | if (It != UseCounts.end()) { |
4143 | unsigned &IIUseCount = It->second; |
4144 | if (--IIUseCount == 0) |
4145 | ProbablyDead.insert(Ptr: II); |
4146 | } |
4147 | } |
4148 | ++LeaderUseCount; |
4149 | AnythingReplaced = true; |
4150 | } |
4151 | } |
4152 | } |
4153 | |
4154 | // At this point, anything still in the ProbablyDead set is actually dead if |
4155 | // would be trivially dead. |
4156 | for (auto *I : ProbablyDead) |
4157 | if (wouldInstructionBeTriviallyDead(I)) |
4158 | markInstructionForDeletion(I); |
4159 | |
4160 | // Cleanup the congruence class. |
4161 | CongruenceClass::MemberSet MembersLeft; |
4162 | for (auto *Member : *CC) |
4163 | if (!isa<Instruction>(Val: Member) || |
4164 | !InstructionsToErase.count(Ptr: cast<Instruction>(Val: Member))) |
4165 | MembersLeft.insert(Ptr: Member); |
4166 | CC->swap(Other&: MembersLeft); |
4167 | |
4168 | // If we have possible dead stores to look at, try to eliminate them. |
4169 | if (CC->getStoreCount() > 0) { |
4170 | convertClassToLoadsAndStores(Dense: *CC, LoadsAndStores&: PossibleDeadStores); |
4171 | llvm::sort(C&: PossibleDeadStores); |
4172 | ValueDFSStack EliminationStack; |
4173 | for (auto &VD : PossibleDeadStores) { |
4174 | int MemberDFSIn = VD.DFSIn; |
4175 | int MemberDFSOut = VD.DFSOut; |
4176 | Instruction *Member = cast<Instruction>(Val: VD.Def.getPointer()); |
4177 | if (EliminationStack.empty() || |
4178 | !EliminationStack.isInScope(DFSIn: MemberDFSIn, DFSOut: MemberDFSOut)) { |
4179 | // Sync to our current scope. |
4180 | EliminationStack.popUntilDFSScope(DFSIn: MemberDFSIn, DFSOut: MemberDFSOut); |
4181 | if (EliminationStack.empty()) { |
4182 | EliminationStack.push_back(V: Member, DFSIn: MemberDFSIn, DFSOut: MemberDFSOut); |
4183 | continue; |
4184 | } |
4185 | } |
4186 | // We already did load elimination, so nothing to do here. |
4187 | if (isa<LoadInst>(Val: Member)) |
4188 | continue; |
4189 | assert(!EliminationStack.empty()); |
4190 | Instruction *Leader = cast<Instruction>(Val: EliminationStack.back()); |
4191 | (void)Leader; |
4192 | assert(DT->dominates(Leader->getParent(), Member->getParent())); |
4193 | // Member is dominater by Leader, and thus dead |
4194 | LLVM_DEBUG(dbgs() << "Marking dead store " << *Member |
4195 | << " that is dominated by " << *Leader << "\n" ); |
4196 | markInstructionForDeletion(I: Member); |
4197 | CC->erase(M: Member); |
4198 | ++NumGVNDeadStores; |
4199 | } |
4200 | } |
4201 | } |
4202 | return AnythingReplaced; |
4203 | } |
4204 | |
4205 | // This function provides global ranking of operations so that we can place them |
4206 | // in a canonical order. Note that rank alone is not necessarily enough for a |
4207 | // complete ordering, as constants all have the same rank. However, generally, |
4208 | // we will simplify an operation with all constants so that it doesn't matter |
4209 | // what order they appear in. |
4210 | unsigned int NewGVN::getRank(const Value *V) const { |
4211 | // Prefer constants to undef to anything else |
4212 | // Undef is a constant, have to check it first. |
4213 | // Prefer poison to undef as it's less defined. |
4214 | // Prefer smaller constants to constantexprs |
4215 | // Note that the order here matters because of class inheritance |
4216 | if (isa<ConstantExpr>(Val: V)) |
4217 | return 3; |
4218 | if (isa<PoisonValue>(Val: V)) |
4219 | return 1; |
4220 | if (isa<UndefValue>(Val: V)) |
4221 | return 2; |
4222 | if (isa<Constant>(Val: V)) |
4223 | return 0; |
4224 | if (auto *A = dyn_cast<Argument>(Val: V)) |
4225 | return 4 + A->getArgNo(); |
4226 | |
4227 | // Need to shift the instruction DFS by number of arguments + 5 to account for |
4228 | // the constant and argument ranking above. |
4229 | unsigned Result = InstrToDFSNum(V); |
4230 | if (Result > 0) |
4231 | return 5 + NumFuncArgs + Result; |
4232 | // Unreachable or something else, just return a really large number. |
4233 | return ~0; |
4234 | } |
4235 | |
4236 | // This is a function that says whether two commutative operations should |
4237 | // have their order swapped when canonicalizing. |
4238 | bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { |
4239 | // Because we only care about a total ordering, and don't rewrite expressions |
4240 | // in this order, we order by rank, which will give a strict weak ordering to |
4241 | // everything but constants, and then we order by pointer address. |
4242 | return std::make_pair(x: getRank(V: A), y&: A) > std::make_pair(x: getRank(V: B), y&: B); |
4243 | } |
4244 | |
4245 | bool NewGVN::shouldSwapOperandsForIntrinsic(const Value *A, const Value *B, |
4246 | const IntrinsicInst *I) const { |
4247 | auto LookupResult = IntrinsicInstPred.find(Val: I); |
4248 | if (shouldSwapOperands(A, B)) { |
4249 | if (LookupResult == IntrinsicInstPred.end()) |
4250 | IntrinsicInstPred.insert(KV: {I, B}); |
4251 | else |
4252 | LookupResult->second = B; |
4253 | return true; |
4254 | } |
4255 | |
4256 | if (LookupResult != IntrinsicInstPred.end()) { |
4257 | auto *SeenPredicate = LookupResult->second; |
4258 | if (SeenPredicate) { |
4259 | if (SeenPredicate == B) |
4260 | return true; |
4261 | else |
4262 | LookupResult->second = nullptr; |
4263 | } |
4264 | } |
4265 | return false; |
4266 | } |
4267 | |
4268 | PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) { |
4269 | // Apparently the order in which we get these results matter for |
4270 | // the old GVN (see Chandler's comment in GVN.cpp). I'll keep |
4271 | // the same order here, just in case. |
4272 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
4273 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
4274 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
4275 | auto &AA = AM.getResult<AAManager>(IR&: F); |
4276 | auto &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA(); |
4277 | bool Changed = |
4278 | NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getDataLayout()) |
4279 | .runGVN(); |
4280 | if (!Changed) |
4281 | return PreservedAnalyses::all(); |
4282 | PreservedAnalyses PA; |
4283 | PA.preserve<DominatorTreeAnalysis>(); |
4284 | return PA; |
4285 | } |
4286 | |