1 | //===- GVNSink.cpp - sink expressions into successors ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file GVNSink.cpp |
10 | /// This pass attempts to sink instructions into successors, reducing static |
11 | /// instruction count and enabling if-conversion. |
12 | /// |
13 | /// We use a variant of global value numbering to decide what can be sunk. |
14 | /// Consider: |
15 | /// |
16 | /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] |
17 | /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] |
18 | /// \ / |
19 | /// [ %e = phi i32 %a2, %c2 ] |
20 | /// [ add i32 %e, 4 ] |
21 | /// |
22 | /// |
23 | /// GVN would number %a1 and %c1 differently because they compute different |
24 | /// results - the VN of an instruction is a function of its opcode and the |
25 | /// transitive closure of its operands. This is the key property for hoisting |
26 | /// and CSE. |
27 | /// |
28 | /// What we want when sinking however is for a numbering that is a function of |
29 | /// the *uses* of an instruction, which allows us to answer the question "if I |
30 | /// replace %a1 with %c1, will it contribute in an equivalent way to all |
31 | /// successive instructions?". The PostValueTable class in GVN provides this |
32 | /// mapping. |
33 | // |
34 | //===----------------------------------------------------------------------===// |
35 | |
36 | #include "llvm/ADT/ArrayRef.h" |
37 | #include "llvm/ADT/DenseMap.h" |
38 | #include "llvm/ADT/DenseSet.h" |
39 | #include "llvm/ADT/Hashing.h" |
40 | #include "llvm/ADT/PostOrderIterator.h" |
41 | #include "llvm/ADT/STLExtras.h" |
42 | #include "llvm/ADT/SmallPtrSet.h" |
43 | #include "llvm/ADT/SmallVector.h" |
44 | #include "llvm/ADT/Statistic.h" |
45 | #include "llvm/Analysis/GlobalsModRef.h" |
46 | #include "llvm/IR/BasicBlock.h" |
47 | #include "llvm/IR/CFG.h" |
48 | #include "llvm/IR/Constants.h" |
49 | #include "llvm/IR/Function.h" |
50 | #include "llvm/IR/InstrTypes.h" |
51 | #include "llvm/IR/Instruction.h" |
52 | #include "llvm/IR/Instructions.h" |
53 | #include "llvm/IR/PassManager.h" |
54 | #include "llvm/IR/Type.h" |
55 | #include "llvm/IR/Use.h" |
56 | #include "llvm/IR/Value.h" |
57 | #include "llvm/Support/Allocator.h" |
58 | #include "llvm/Support/ArrayRecycler.h" |
59 | #include "llvm/Support/AtomicOrdering.h" |
60 | #include "llvm/Support/Casting.h" |
61 | #include "llvm/Support/Compiler.h" |
62 | #include "llvm/Support/Debug.h" |
63 | #include "llvm/Support/raw_ostream.h" |
64 | #include "llvm/Transforms/Scalar/GVN.h" |
65 | #include "llvm/Transforms/Scalar/GVNExpression.h" |
66 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
67 | #include "llvm/Transforms/Utils/Local.h" |
68 | #include <algorithm> |
69 | #include <cassert> |
70 | #include <cstddef> |
71 | #include <cstdint> |
72 | #include <iterator> |
73 | #include <utility> |
74 | |
75 | using namespace llvm; |
76 | |
77 | #define DEBUG_TYPE "gvn-sink" |
78 | |
79 | STATISTIC(NumRemoved, "Number of instructions removed" ); |
80 | |
81 | namespace llvm { |
82 | namespace GVNExpression { |
83 | |
84 | LLVM_DUMP_METHOD void Expression::dump() const { |
85 | print(OS&: dbgs()); |
86 | dbgs() << "\n" ; |
87 | } |
88 | |
89 | } // end namespace GVNExpression |
90 | } // end namespace llvm |
91 | |
92 | namespace { |
93 | |
94 | static bool isMemoryInst(const Instruction *I) { |
95 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) || |
96 | (isa<InvokeInst>(Val: I) && !cast<InvokeInst>(Val: I)->doesNotAccessMemory()) || |
97 | (isa<CallInst>(Val: I) && !cast<CallInst>(Val: I)->doesNotAccessMemory()); |
98 | } |
99 | |
100 | /// Iterates through instructions in a set of blocks in reverse order from the |
101 | /// first non-terminator. For example (assume all blocks have size n): |
102 | /// LockstepReverseIterator I([B1, B2, B3]); |
103 | /// *I-- = [B1[n], B2[n], B3[n]]; |
104 | /// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; |
105 | /// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; |
106 | /// ... |
107 | /// |
108 | /// It continues until all blocks have been exhausted. Use \c getActiveBlocks() |
109 | /// to |
110 | /// determine which blocks are still going and the order they appear in the |
111 | /// list returned by operator*. |
112 | class LockstepReverseIterator { |
113 | ArrayRef<BasicBlock *> Blocks; |
114 | SmallSetVector<BasicBlock *, 4> ActiveBlocks; |
115 | SmallVector<Instruction *, 4> Insts; |
116 | bool Fail; |
117 | |
118 | public: |
119 | LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) { |
120 | reset(); |
121 | } |
122 | |
123 | void reset() { |
124 | Fail = false; |
125 | ActiveBlocks.clear(); |
126 | for (BasicBlock *BB : Blocks) |
127 | ActiveBlocks.insert(X: BB); |
128 | Insts.clear(); |
129 | for (BasicBlock *BB : Blocks) { |
130 | if (BB->size() <= 1) { |
131 | // Block wasn't big enough - only contained a terminator. |
132 | ActiveBlocks.remove(X: BB); |
133 | continue; |
134 | } |
135 | Insts.push_back(Elt: BB->getTerminator()->getPrevNonDebugInstruction()); |
136 | } |
137 | if (Insts.empty()) |
138 | Fail = true; |
139 | } |
140 | |
141 | bool isValid() const { return !Fail; } |
142 | ArrayRef<Instruction *> operator*() const { return Insts; } |
143 | |
144 | // Note: This needs to return a SmallSetVector as the elements of |
145 | // ActiveBlocks will be later copied to Blocks using std::copy. The |
146 | // resultant order of elements in Blocks needs to be deterministic. |
147 | // Using SmallPtrSet instead causes non-deterministic order while |
148 | // copying. And we cannot simply sort Blocks as they need to match the |
149 | // corresponding Values. |
150 | SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; } |
151 | |
152 | void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) { |
153 | for (auto II = Insts.begin(); II != Insts.end();) { |
154 | if (!Blocks.contains(key: (*II)->getParent())) { |
155 | ActiveBlocks.remove(X: (*II)->getParent()); |
156 | II = Insts.erase(CI: II); |
157 | } else { |
158 | ++II; |
159 | } |
160 | } |
161 | } |
162 | |
163 | void operator--() { |
164 | if (Fail) |
165 | return; |
166 | SmallVector<Instruction *, 4> NewInsts; |
167 | for (auto *Inst : Insts) { |
168 | if (Inst == &Inst->getParent()->front()) |
169 | ActiveBlocks.remove(X: Inst->getParent()); |
170 | else |
171 | NewInsts.push_back(Elt: Inst->getPrevNonDebugInstruction()); |
172 | } |
173 | if (NewInsts.empty()) { |
174 | Fail = true; |
175 | return; |
176 | } |
177 | Insts = NewInsts; |
178 | } |
179 | }; |
180 | |
181 | //===----------------------------------------------------------------------===// |
182 | |
183 | /// Candidate solution for sinking. There may be different ways to |
184 | /// sink instructions, differing in the number of instructions sunk, |
185 | /// the number of predecessors sunk from and the number of PHIs |
186 | /// required. |
187 | struct SinkingInstructionCandidate { |
188 | unsigned NumBlocks; |
189 | unsigned NumInstructions; |
190 | unsigned NumPHIs; |
191 | unsigned NumMemoryInsts; |
192 | int Cost = -1; |
193 | SmallVector<BasicBlock *, 4> Blocks; |
194 | |
195 | void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { |
196 | unsigned = NumPHIs - NumOrigPHIs; |
197 | unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; |
198 | Cost = (NumInstructions * (NumBlocks - 1)) - |
199 | (NumExtraPHIs * |
200 | NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. |
201 | - SplitEdgeCost; |
202 | } |
203 | |
204 | bool operator>(const SinkingInstructionCandidate &Other) const { |
205 | return Cost > Other.Cost; |
206 | } |
207 | }; |
208 | |
209 | #ifndef NDEBUG |
210 | raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { |
211 | OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks |
212 | << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">" ; |
213 | return OS; |
214 | } |
215 | #endif |
216 | |
217 | //===----------------------------------------------------------------------===// |
218 | |
219 | /// Describes a PHI node that may or may not exist. These track the PHIs |
220 | /// that must be created if we sunk a sequence of instructions. It provides |
221 | /// a hash function for efficient equality comparisons. |
222 | class ModelledPHI { |
223 | SmallVector<Value *, 4> Values; |
224 | SmallVector<BasicBlock *, 4> Blocks; |
225 | |
226 | public: |
227 | ModelledPHI() = default; |
228 | |
229 | ModelledPHI(const PHINode *PN, |
230 | const DenseMap<const BasicBlock *, unsigned> &BlockOrder) { |
231 | // BasicBlock comes first so we sort by basic block pointer order, |
232 | // then by value pointer order. No need to call `verifyModelledPHI` |
233 | // As the Values and Blocks are populated in a deterministic order. |
234 | using OpsType = std::pair<BasicBlock *, Value *>; |
235 | SmallVector<OpsType, 4> Ops; |
236 | for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) |
237 | Ops.push_back(Elt: {PN->getIncomingBlock(i: I), PN->getIncomingValue(i: I)}); |
238 | |
239 | auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) { |
240 | return BlockOrder.lookup(Val: O1.first) < BlockOrder.lookup(Val: O2.first); |
241 | }; |
242 | // Sort in a deterministic order. |
243 | llvm::sort(C&: Ops, Comp: ComesBefore); |
244 | |
245 | for (auto &P : Ops) { |
246 | Blocks.push_back(Elt: P.first); |
247 | Values.push_back(Elt: P.second); |
248 | } |
249 | } |
250 | |
251 | /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI |
252 | /// without the same ID. |
253 | /// \note This is specifically for DenseMapInfo - do not use this! |
254 | static ModelledPHI createDummy(size_t ID) { |
255 | ModelledPHI M; |
256 | M.Values.push_back(Elt: reinterpret_cast<Value*>(ID)); |
257 | return M; |
258 | } |
259 | |
260 | void |
261 | verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) { |
262 | assert(Values.size() > 1 && Blocks.size() > 1 && |
263 | "Modelling PHI with less than 2 values" ); |
264 | auto ComesBefore = [BlockOrder](const BasicBlock *BB1, |
265 | const BasicBlock *BB2) { |
266 | return BlockOrder.lookup(Val: BB1) < BlockOrder.lookup(Val: BB2); |
267 | }; |
268 | assert(llvm::is_sorted(Blocks, ComesBefore)); |
269 | int C = 0; |
270 | for (const Value *V : Values) { |
271 | if (!isa<UndefValue>(Val: V)) { |
272 | assert(cast<Instruction>(V)->getParent() == Blocks[C]); |
273 | (void)C; |
274 | } |
275 | C++; |
276 | } |
277 | } |
278 | /// Create a PHI from an array of incoming values and incoming blocks. |
279 | ModelledPHI(SmallVectorImpl<Instruction *> &V, |
280 | SmallSetVector<BasicBlock *, 4> &B, |
281 | const DenseMap<const BasicBlock *, unsigned> &BlockOrder) { |
282 | // The order of Values and Blocks are already ordered by the caller. |
283 | llvm::copy(Range&: V, Out: std::back_inserter(x&: Values)); |
284 | llvm::copy(Range&: B, Out: std::back_inserter(x&: Blocks)); |
285 | verifyModelledPHI(BlockOrder); |
286 | } |
287 | |
288 | /// Create a PHI from [I[OpNum] for I in Insts]. |
289 | /// TODO: Figure out a way to verifyModelledPHI in this constructor. |
290 | ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, |
291 | SmallSetVector<BasicBlock *, 4> &B) { |
292 | llvm::copy(Range&: B, Out: std::back_inserter(x&: Blocks)); |
293 | for (auto *I : Insts) |
294 | Values.push_back(Elt: I->getOperand(i: OpNum)); |
295 | } |
296 | |
297 | /// Restrict the PHI's contents down to only \c NewBlocks. |
298 | /// \c NewBlocks must be a subset of \c this->Blocks. |
299 | void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) { |
300 | auto BI = Blocks.begin(); |
301 | auto VI = Values.begin(); |
302 | while (BI != Blocks.end()) { |
303 | assert(VI != Values.end()); |
304 | if (!NewBlocks.contains(key: *BI)) { |
305 | BI = Blocks.erase(CI: BI); |
306 | VI = Values.erase(CI: VI); |
307 | } else { |
308 | ++BI; |
309 | ++VI; |
310 | } |
311 | } |
312 | assert(Blocks.size() == NewBlocks.size()); |
313 | } |
314 | |
315 | ArrayRef<Value *> getValues() const { return Values; } |
316 | |
317 | bool areAllIncomingValuesSame() const { |
318 | return llvm::all_equal(Range: Values); |
319 | } |
320 | |
321 | bool areAllIncomingValuesSameType() const { |
322 | return llvm::all_of( |
323 | Range: Values, P: [&](Value *V) { return V->getType() == Values[0]->getType(); }); |
324 | } |
325 | |
326 | bool areAnyIncomingValuesConstant() const { |
327 | return llvm::any_of(Range: Values, P: [&](Value *V) { return isa<Constant>(Val: V); }); |
328 | } |
329 | |
330 | // Hash functor |
331 | unsigned hash() const { |
332 | // Is deterministic because Values are saved in a specific order. |
333 | return (unsigned)hash_combine_range(first: Values.begin(), last: Values.end()); |
334 | } |
335 | |
336 | bool operator==(const ModelledPHI &Other) const { |
337 | return Values == Other.Values && Blocks == Other.Blocks; |
338 | } |
339 | }; |
340 | |
341 | template <typename ModelledPHI> struct DenseMapInfo { |
342 | static inline ModelledPHI &getEmptyKey() { |
343 | static ModelledPHI Dummy = ModelledPHI::createDummy(0); |
344 | return Dummy; |
345 | } |
346 | |
347 | static inline ModelledPHI &getTombstoneKey() { |
348 | static ModelledPHI Dummy = ModelledPHI::createDummy(1); |
349 | return Dummy; |
350 | } |
351 | |
352 | static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } |
353 | |
354 | static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { |
355 | return LHS == RHS; |
356 | } |
357 | }; |
358 | |
359 | using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; |
360 | |
361 | //===----------------------------------------------------------------------===// |
362 | // ValueTable |
363 | //===----------------------------------------------------------------------===// |
364 | // This is a value number table where the value number is a function of the |
365 | // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know |
366 | // that the program would be equivalent if we replaced A with PHI(A, B). |
367 | //===----------------------------------------------------------------------===// |
368 | |
369 | /// A GVN expression describing how an instruction is used. The operands |
370 | /// field of BasicExpression is used to store uses, not operands. |
371 | /// |
372 | /// This class also contains fields for discriminators used when determining |
373 | /// equivalence of instructions with sideeffects. |
374 | class InstructionUseExpr : public GVNExpression::BasicExpression { |
375 | unsigned MemoryUseOrder = -1; |
376 | bool Volatile = false; |
377 | ArrayRef<int> ShuffleMask; |
378 | |
379 | public: |
380 | InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, |
381 | BumpPtrAllocator &A) |
382 | : GVNExpression::BasicExpression(I->getNumUses()) { |
383 | allocateOperands(Recycler&: R, Allocator&: A); |
384 | setOpcode(I->getOpcode()); |
385 | setType(I->getType()); |
386 | |
387 | if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) |
388 | ShuffleMask = SVI->getShuffleMask().copy(A); |
389 | |
390 | for (auto &U : I->uses()) |
391 | op_push_back(Arg: U.getUser()); |
392 | llvm::sort(Start: op_begin(), End: op_end()); |
393 | } |
394 | |
395 | void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } |
396 | void setVolatile(bool V) { Volatile = V; } |
397 | |
398 | hash_code getHashValue() const override { |
399 | return hash_combine(args: GVNExpression::BasicExpression::getHashValue(), |
400 | args: MemoryUseOrder, args: Volatile, args: ShuffleMask); |
401 | } |
402 | |
403 | template <typename Function> hash_code getHashValue(Function MapFn) { |
404 | hash_code H = hash_combine(args: getOpcode(), args: getType(), args: MemoryUseOrder, args: Volatile, |
405 | args: ShuffleMask); |
406 | for (auto *V : operands()) |
407 | H = hash_combine(H, MapFn(V)); |
408 | return H; |
409 | } |
410 | }; |
411 | |
412 | using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>; |
413 | |
414 | class ValueTable { |
415 | DenseMap<Value *, uint32_t> ValueNumbering; |
416 | DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; |
417 | DenseMap<size_t, uint32_t> HashNumbering; |
418 | BumpPtrAllocator Allocator; |
419 | ArrayRecycler<Value *> Recycler; |
420 | uint32_t nextValueNumber = 1; |
421 | BasicBlocksSet ReachableBBs; |
422 | |
423 | /// Create an expression for I based on its opcode and its uses. If I |
424 | /// touches or reads memory, the expression is also based upon its memory |
425 | /// order - see \c getMemoryUseOrder(). |
426 | InstructionUseExpr *createExpr(Instruction *I) { |
427 | InstructionUseExpr *E = |
428 | new (Allocator) InstructionUseExpr(I, Recycler, Allocator); |
429 | if (isMemoryInst(I)) |
430 | E->setMemoryUseOrder(getMemoryUseOrder(Inst: I)); |
431 | |
432 | if (CmpInst *C = dyn_cast<CmpInst>(Val: I)) { |
433 | CmpInst::Predicate Predicate = C->getPredicate(); |
434 | E->setOpcode((C->getOpcode() << 8) | Predicate); |
435 | } |
436 | return E; |
437 | } |
438 | |
439 | /// Helper to compute the value number for a memory instruction |
440 | /// (LoadInst/StoreInst), including checking the memory ordering and |
441 | /// volatility. |
442 | template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { |
443 | if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) |
444 | return nullptr; |
445 | InstructionUseExpr *E = createExpr(I); |
446 | E->setVolatile(I->isVolatile()); |
447 | return E; |
448 | } |
449 | |
450 | public: |
451 | ValueTable() = default; |
452 | |
453 | /// Set basic blocks reachable from entry block. |
454 | void setReachableBBs(const BasicBlocksSet &ReachableBBs) { |
455 | this->ReachableBBs = ReachableBBs; |
456 | } |
457 | |
458 | /// Returns the value number for the specified value, assigning |
459 | /// it a new number if it did not have one before. |
460 | uint32_t lookupOrAdd(Value *V) { |
461 | auto VI = ValueNumbering.find(Val: V); |
462 | if (VI != ValueNumbering.end()) |
463 | return VI->second; |
464 | |
465 | if (!isa<Instruction>(Val: V)) { |
466 | ValueNumbering[V] = nextValueNumber; |
467 | return nextValueNumber++; |
468 | } |
469 | |
470 | Instruction *I = cast<Instruction>(Val: V); |
471 | if (!ReachableBBs.contains(Ptr: I->getParent())) |
472 | return ~0U; |
473 | |
474 | InstructionUseExpr *exp = nullptr; |
475 | switch (I->getOpcode()) { |
476 | case Instruction::Load: |
477 | exp = createMemoryExpr(I: cast<LoadInst>(Val: I)); |
478 | break; |
479 | case Instruction::Store: |
480 | exp = createMemoryExpr(I: cast<StoreInst>(Val: I)); |
481 | break; |
482 | case Instruction::Call: |
483 | case Instruction::Invoke: |
484 | case Instruction::FNeg: |
485 | case Instruction::Add: |
486 | case Instruction::FAdd: |
487 | case Instruction::Sub: |
488 | case Instruction::FSub: |
489 | case Instruction::Mul: |
490 | case Instruction::FMul: |
491 | case Instruction::UDiv: |
492 | case Instruction::SDiv: |
493 | case Instruction::FDiv: |
494 | case Instruction::URem: |
495 | case Instruction::SRem: |
496 | case Instruction::FRem: |
497 | case Instruction::Shl: |
498 | case Instruction::LShr: |
499 | case Instruction::AShr: |
500 | case Instruction::And: |
501 | case Instruction::Or: |
502 | case Instruction::Xor: |
503 | case Instruction::ICmp: |
504 | case Instruction::FCmp: |
505 | case Instruction::Trunc: |
506 | case Instruction::ZExt: |
507 | case Instruction::SExt: |
508 | case Instruction::FPToUI: |
509 | case Instruction::FPToSI: |
510 | case Instruction::UIToFP: |
511 | case Instruction::SIToFP: |
512 | case Instruction::FPTrunc: |
513 | case Instruction::FPExt: |
514 | case Instruction::PtrToInt: |
515 | case Instruction::IntToPtr: |
516 | case Instruction::BitCast: |
517 | case Instruction::AddrSpaceCast: |
518 | case Instruction::Select: |
519 | case Instruction::ExtractElement: |
520 | case Instruction::InsertElement: |
521 | case Instruction::ShuffleVector: |
522 | case Instruction::InsertValue: |
523 | case Instruction::GetElementPtr: |
524 | exp = createExpr(I); |
525 | break; |
526 | default: |
527 | break; |
528 | } |
529 | |
530 | if (!exp) { |
531 | ValueNumbering[V] = nextValueNumber; |
532 | return nextValueNumber++; |
533 | } |
534 | |
535 | uint32_t e = ExpressionNumbering[exp]; |
536 | if (!e) { |
537 | hash_code H = exp->getHashValue(MapFn: [=](Value *V) { return lookupOrAdd(V); }); |
538 | auto I = HashNumbering.find(Val: H); |
539 | if (I != HashNumbering.end()) { |
540 | e = I->second; |
541 | } else { |
542 | e = nextValueNumber++; |
543 | HashNumbering[H] = e; |
544 | ExpressionNumbering[exp] = e; |
545 | } |
546 | } |
547 | ValueNumbering[V] = e; |
548 | return e; |
549 | } |
550 | |
551 | /// Returns the value number of the specified value. Fails if the value has |
552 | /// not yet been numbered. |
553 | uint32_t lookup(Value *V) const { |
554 | auto VI = ValueNumbering.find(Val: V); |
555 | assert(VI != ValueNumbering.end() && "Value not numbered?" ); |
556 | return VI->second; |
557 | } |
558 | |
559 | /// Removes all value numberings and resets the value table. |
560 | void clear() { |
561 | ValueNumbering.clear(); |
562 | ExpressionNumbering.clear(); |
563 | HashNumbering.clear(); |
564 | Recycler.clear(Allocator); |
565 | nextValueNumber = 1; |
566 | } |
567 | |
568 | /// \c Inst uses or touches memory. Return an ID describing the memory state |
569 | /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), |
570 | /// the exact same memory operations happen after I1 and I2. |
571 | /// |
572 | /// This is a very hard problem in general, so we use domain-specific |
573 | /// knowledge that we only ever check for equivalence between blocks sharing a |
574 | /// single immediate successor that is common, and when determining if I1 == |
575 | /// I2 we will have already determined that next(I1) == next(I2). This |
576 | /// inductive property allows us to simply return the value number of the next |
577 | /// instruction that defines memory. |
578 | uint32_t getMemoryUseOrder(Instruction *Inst) { |
579 | auto *BB = Inst->getParent(); |
580 | for (auto I = std::next(x: Inst->getIterator()), E = BB->end(); |
581 | I != E && !I->isTerminator(); ++I) { |
582 | if (!isMemoryInst(I: &*I)) |
583 | continue; |
584 | if (isa<LoadInst>(Val: &*I)) |
585 | continue; |
586 | CallInst *CI = dyn_cast<CallInst>(Val: &*I); |
587 | if (CI && CI->onlyReadsMemory()) |
588 | continue; |
589 | InvokeInst *II = dyn_cast<InvokeInst>(Val: &*I); |
590 | if (II && II->onlyReadsMemory()) |
591 | continue; |
592 | return lookupOrAdd(V: &*I); |
593 | } |
594 | return 0; |
595 | } |
596 | }; |
597 | |
598 | //===----------------------------------------------------------------------===// |
599 | |
600 | class GVNSink { |
601 | public: |
602 | GVNSink() {} |
603 | |
604 | bool run(Function &F) { |
605 | LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() |
606 | << "\n" ); |
607 | |
608 | unsigned NumSunk = 0; |
609 | ReversePostOrderTraversal<Function*> RPOT(&F); |
610 | VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end())); |
611 | // Populate reverse post-order to order basic blocks in deterministic |
612 | // order. Any arbitrary ordering will work in this case as long as they are |
613 | // deterministic. The node ordering of newly created basic blocks |
614 | // are irrelevant because RPOT(for computing sinkable candidates) is also |
615 | // obtained ahead of time and only their order are relevant for this pass. |
616 | unsigned NodeOrdering = 0; |
617 | RPOTOrder[*RPOT.begin()] = ++NodeOrdering; |
618 | for (auto *BB : RPOT) |
619 | if (!pred_empty(BB)) |
620 | RPOTOrder[BB] = ++NodeOrdering; |
621 | for (auto *N : RPOT) |
622 | NumSunk += sinkBB(BBEnd: N); |
623 | |
624 | return NumSunk > 0; |
625 | } |
626 | |
627 | private: |
628 | ValueTable VN; |
629 | DenseMap<const BasicBlock *, unsigned> RPOTOrder; |
630 | |
631 | bool shouldAvoidSinkingInstruction(Instruction *I) { |
632 | // These instructions may change or break semantics if moved. |
633 | if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) || |
634 | I->getType()->isTokenTy()) |
635 | return true; |
636 | return false; |
637 | } |
638 | |
639 | /// The main heuristic function. Analyze the set of instructions pointed to by |
640 | /// LRI and return a candidate solution if these instructions can be sunk, or |
641 | /// std::nullopt otherwise. |
642 | std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking( |
643 | LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, |
644 | ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents); |
645 | |
646 | /// Create a ModelledPHI for each PHI in BB, adding to PHIs. |
647 | void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, |
648 | SmallPtrSetImpl<Value *> &PHIContents) { |
649 | for (PHINode &PN : BB->phis()) { |
650 | auto MPHI = ModelledPHI(&PN, RPOTOrder); |
651 | PHIs.insert(V: MPHI); |
652 | for (auto *V : MPHI.getValues()) |
653 | PHIContents.insert(Ptr: V); |
654 | } |
655 | } |
656 | |
657 | /// The main instruction sinking driver. Set up state and try and sink |
658 | /// instructions into BBEnd from its predecessors. |
659 | unsigned sinkBB(BasicBlock *BBEnd); |
660 | |
661 | /// Perform the actual mechanics of sinking an instruction from Blocks into |
662 | /// BBEnd, which is their only successor. |
663 | void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd); |
664 | |
665 | /// Remove PHIs that all have the same incoming value. |
666 | void foldPointlessPHINodes(BasicBlock *BB) { |
667 | auto I = BB->begin(); |
668 | while (PHINode *PN = dyn_cast<PHINode>(Val: I++)) { |
669 | if (!llvm::all_of(Range: PN->incoming_values(), P: [&](const Value *V) { |
670 | return V == PN->getIncomingValue(i: 0); |
671 | })) |
672 | continue; |
673 | if (PN->getIncomingValue(i: 0) != PN) |
674 | PN->replaceAllUsesWith(V: PN->getIncomingValue(i: 0)); |
675 | else |
676 | PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType())); |
677 | PN->eraseFromParent(); |
678 | } |
679 | } |
680 | }; |
681 | |
682 | std::optional<SinkingInstructionCandidate> |
683 | GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI, |
684 | unsigned &InstNum, |
685 | unsigned &MemoryInstNum, |
686 | ModelledPHISet &NeededPHIs, |
687 | SmallPtrSetImpl<Value *> &PHIContents) { |
688 | auto Insts = *LRI; |
689 | LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n" ; for (auto *I |
690 | : Insts) { |
691 | I->dump(); |
692 | } dbgs() << " ]\n" ;); |
693 | |
694 | DenseMap<uint32_t, unsigned> VNums; |
695 | for (auto *I : Insts) { |
696 | uint32_t N = VN.lookupOrAdd(V: I); |
697 | LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n" ); |
698 | if (N == ~0U) |
699 | return std::nullopt; |
700 | VNums[N]++; |
701 | } |
702 | unsigned VNumToSink = llvm::max_element(Range&: VNums, C: llvm::less_second())->first; |
703 | |
704 | if (VNums[VNumToSink] == 1) |
705 | // Can't sink anything! |
706 | return std::nullopt; |
707 | |
708 | // Now restrict the number of incoming blocks down to only those with |
709 | // VNumToSink. |
710 | auto &ActivePreds = LRI.getActiveBlocks(); |
711 | unsigned InitialActivePredSize = ActivePreds.size(); |
712 | SmallVector<Instruction *, 4> NewInsts; |
713 | for (auto *I : Insts) { |
714 | if (VN.lookup(V: I) != VNumToSink) |
715 | ActivePreds.remove(X: I->getParent()); |
716 | else |
717 | NewInsts.push_back(Elt: I); |
718 | } |
719 | for (auto *I : NewInsts) |
720 | if (shouldAvoidSinkingInstruction(I)) |
721 | return std::nullopt; |
722 | |
723 | // If we've restricted the incoming blocks, restrict all needed PHIs also |
724 | // to that set. |
725 | bool RecomputePHIContents = false; |
726 | if (ActivePreds.size() != InitialActivePredSize) { |
727 | ModelledPHISet NewNeededPHIs; |
728 | for (auto P : NeededPHIs) { |
729 | P.restrictToBlocks(NewBlocks: ActivePreds); |
730 | NewNeededPHIs.insert(V: P); |
731 | } |
732 | NeededPHIs = NewNeededPHIs; |
733 | LRI.restrictToBlocks(Blocks&: ActivePreds); |
734 | RecomputePHIContents = true; |
735 | } |
736 | |
737 | // The sunk instruction's results. |
738 | ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder); |
739 | |
740 | // Does sinking this instruction render previous PHIs redundant? |
741 | if (NeededPHIs.erase(V: NewPHI)) |
742 | RecomputePHIContents = true; |
743 | |
744 | if (RecomputePHIContents) { |
745 | // The needed PHIs have changed, so recompute the set of all needed |
746 | // values. |
747 | PHIContents.clear(); |
748 | for (auto &PHI : NeededPHIs) |
749 | PHIContents.insert(I: PHI.getValues().begin(), E: PHI.getValues().end()); |
750 | } |
751 | |
752 | // Is this instruction required by a later PHI that doesn't match this PHI? |
753 | // if so, we can't sink this instruction. |
754 | for (auto *V : NewPHI.getValues()) |
755 | if (PHIContents.count(Ptr: V)) |
756 | // V exists in this PHI, but the whole PHI is different to NewPHI |
757 | // (else it would have been removed earlier). We cannot continue |
758 | // because this isn't representable. |
759 | return std::nullopt; |
760 | |
761 | // Which operands need PHIs? |
762 | // FIXME: If any of these fail, we should partition up the candidates to |
763 | // try and continue making progress. |
764 | Instruction *I0 = NewInsts[0]; |
765 | |
766 | auto isNotSameOperation = [&I0](Instruction *I) { |
767 | return !I0->isSameOperationAs(I); |
768 | }; |
769 | |
770 | if (any_of(Range&: NewInsts, P: isNotSameOperation)) |
771 | return std::nullopt; |
772 | |
773 | for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { |
774 | ModelledPHI PHI(NewInsts, OpNum, ActivePreds); |
775 | if (PHI.areAllIncomingValuesSame()) |
776 | continue; |
777 | if (!canReplaceOperandWithVariable(I: I0, OpIdx: OpNum)) |
778 | // We can 't create a PHI from this instruction! |
779 | return std::nullopt; |
780 | if (NeededPHIs.count(V: PHI)) |
781 | continue; |
782 | if (!PHI.areAllIncomingValuesSameType()) |
783 | return std::nullopt; |
784 | // Don't create indirect calls! The called value is the final operand. |
785 | if ((isa<CallInst>(Val: I0) || isa<InvokeInst>(Val: I0)) && OpNum == E - 1 && |
786 | PHI.areAnyIncomingValuesConstant()) |
787 | return std::nullopt; |
788 | |
789 | NeededPHIs.reserve(Size: NeededPHIs.size()); |
790 | NeededPHIs.insert(V: PHI); |
791 | PHIContents.insert(I: PHI.getValues().begin(), E: PHI.getValues().end()); |
792 | } |
793 | |
794 | if (isMemoryInst(I: NewInsts[0])) |
795 | ++MemoryInstNum; |
796 | |
797 | SinkingInstructionCandidate Cand; |
798 | Cand.NumInstructions = ++InstNum; |
799 | Cand.NumMemoryInsts = MemoryInstNum; |
800 | Cand.NumBlocks = ActivePreds.size(); |
801 | Cand.NumPHIs = NeededPHIs.size(); |
802 | append_range(C&: Cand.Blocks, R&: ActivePreds); |
803 | |
804 | return Cand; |
805 | } |
806 | |
807 | unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { |
808 | LLVM_DEBUG(dbgs() << "GVNSink: running on basic block " ; |
809 | BBEnd->printAsOperand(dbgs()); dbgs() << "\n" ); |
810 | SmallVector<BasicBlock *, 4> Preds; |
811 | for (auto *B : predecessors(BB: BBEnd)) { |
812 | // Bailout on basic blocks without predecessor(PR42346). |
813 | if (!RPOTOrder.count(Val: B)) |
814 | return 0; |
815 | auto *T = B->getTerminator(); |
816 | if (isa<BranchInst>(Val: T) || isa<SwitchInst>(Val: T)) |
817 | Preds.push_back(Elt: B); |
818 | else |
819 | return 0; |
820 | } |
821 | if (Preds.size() < 2) |
822 | return 0; |
823 | auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) { |
824 | return RPOTOrder.lookup(Val: BB1) < RPOTOrder.lookup(Val: BB2); |
825 | }; |
826 | // Sort in a deterministic order. |
827 | llvm::sort(C&: Preds, Comp: ComesBefore); |
828 | |
829 | unsigned NumOrigPreds = Preds.size(); |
830 | // We can only sink instructions through unconditional branches. |
831 | llvm::erase_if(C&: Preds, P: [](BasicBlock *BB) { |
832 | return BB->getTerminator()->getNumSuccessors() != 1; |
833 | }); |
834 | |
835 | LockstepReverseIterator LRI(Preds); |
836 | SmallVector<SinkingInstructionCandidate, 4> Candidates; |
837 | unsigned InstNum = 0, MemoryInstNum = 0; |
838 | ModelledPHISet NeededPHIs; |
839 | SmallPtrSet<Value *, 4> PHIContents; |
840 | analyzeInitialPHIs(BB: BBEnd, PHIs&: NeededPHIs, PHIContents); |
841 | unsigned NumOrigPHIs = NeededPHIs.size(); |
842 | |
843 | while (LRI.isValid()) { |
844 | auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, |
845 | NeededPHIs, PHIContents); |
846 | if (!Cand) |
847 | break; |
848 | Cand->calculateCost(NumOrigPHIs, NumOrigBlocks: Preds.size()); |
849 | Candidates.emplace_back(Args&: *Cand); |
850 | --LRI; |
851 | } |
852 | |
853 | llvm::stable_sort(Range&: Candidates, C: std::greater<SinkingInstructionCandidate>()); |
854 | LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n" ; for (auto &C |
855 | : Candidates) dbgs() |
856 | << " " << C << "\n" ;); |
857 | |
858 | // Pick the top candidate, as long it is positive! |
859 | if (Candidates.empty() || Candidates.front().Cost <= 0) |
860 | return 0; |
861 | auto C = Candidates.front(); |
862 | |
863 | LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n" ); |
864 | BasicBlock *InsertBB = BBEnd; |
865 | if (C.Blocks.size() < NumOrigPreds) { |
866 | LLVM_DEBUG(dbgs() << " -- Splitting edge to " ; |
867 | BBEnd->printAsOperand(dbgs()); dbgs() << "\n" ); |
868 | InsertBB = SplitBlockPredecessors(BB: BBEnd, Preds: C.Blocks, Suffix: ".gvnsink.split" ); |
869 | if (!InsertBB) { |
870 | LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n" ); |
871 | // Edge couldn't be split. |
872 | return 0; |
873 | } |
874 | } |
875 | |
876 | for (unsigned I = 0; I < C.NumInstructions; ++I) |
877 | sinkLastInstruction(Blocks: C.Blocks, BBEnd: InsertBB); |
878 | |
879 | return C.NumInstructions; |
880 | } |
881 | |
882 | void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, |
883 | BasicBlock *BBEnd) { |
884 | SmallVector<Instruction *, 4> Insts; |
885 | for (BasicBlock *BB : Blocks) |
886 | Insts.push_back(Elt: BB->getTerminator()->getPrevNonDebugInstruction()); |
887 | Instruction *I0 = Insts.front(); |
888 | |
889 | SmallVector<Value *, 4> NewOperands; |
890 | for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { |
891 | bool NeedPHI = llvm::any_of(Range&: Insts, P: [&I0, O](const Instruction *I) { |
892 | return I->getOperand(i: O) != I0->getOperand(i: O); |
893 | }); |
894 | if (!NeedPHI) { |
895 | NewOperands.push_back(Elt: I0->getOperand(i: O)); |
896 | continue; |
897 | } |
898 | |
899 | // Create a new PHI in the successor block and populate it. |
900 | auto *Op = I0->getOperand(i: O); |
901 | assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!" ); |
902 | auto *PN = |
903 | PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink" ); |
904 | PN->insertBefore(InsertPos: BBEnd->begin()); |
905 | for (auto *I : Insts) |
906 | PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent()); |
907 | NewOperands.push_back(Elt: PN); |
908 | } |
909 | |
910 | // Arbitrarily use I0 as the new "common" instruction; remap its operands |
911 | // and move it to the start of the successor block. |
912 | for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) |
913 | I0->getOperandUse(i: O).set(NewOperands[O]); |
914 | I0->moveBefore(MovePos: &*BBEnd->getFirstInsertionPt()); |
915 | |
916 | // Update metadata and IR flags. |
917 | for (auto *I : Insts) |
918 | if (I != I0) { |
919 | combineMetadataForCSE(K: I0, J: I, DoesKMove: true); |
920 | I0->andIRFlags(V: I); |
921 | } |
922 | |
923 | for (auto *I : Insts) |
924 | if (I != I0) { |
925 | I->replaceAllUsesWith(V: I0); |
926 | I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc()); |
927 | } |
928 | foldPointlessPHINodes(BB: BBEnd); |
929 | |
930 | // Finally nuke all instructions apart from the common instruction. |
931 | for (auto *I : Insts) |
932 | if (I != I0) |
933 | I->eraseFromParent(); |
934 | |
935 | NumRemoved += Insts.size() - 1; |
936 | } |
937 | |
938 | } // end anonymous namespace |
939 | |
940 | PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { |
941 | GVNSink G; |
942 | if (!G.run(F)) |
943 | return PreservedAnalyses::all(); |
944 | |
945 | return PreservedAnalyses::none(); |
946 | } |
947 | |