1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseSet.h"
39#include "llvm/ADT/Hashing.h"
40#include "llvm/ADT/PostOrderIterator.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Analysis/GlobalsModRef.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
57#include "llvm/Support/Allocator.h"
58#include "llvm/Support/ArrayRecycler.h"
59#include "llvm/Support/AtomicOrdering.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/Compiler.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/Scalar/GVN.h"
65#include "llvm/Transforms/Scalar/GVNExpression.h"
66#include "llvm/Transforms/Utils/BasicBlockUtils.h"
67#include "llvm/Transforms/Utils/Local.h"
68#include <algorithm>
69#include <cassert>
70#include <cstddef>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76
77#define DEBUG_TYPE "gvn-sink"
78
79STATISTIC(NumRemoved, "Number of instructions removed");
80
81namespace llvm {
82namespace GVNExpression {
83
84LLVM_DUMP_METHOD void Expression::dump() const {
85 print(OS&: dbgs());
86 dbgs() << "\n";
87}
88
89} // end namespace GVNExpression
90} // end namespace llvm
91
92namespace {
93
94static bool isMemoryInst(const Instruction *I) {
95 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) ||
96 (isa<InvokeInst>(Val: I) && !cast<InvokeInst>(Val: I)->doesNotAccessMemory()) ||
97 (isa<CallInst>(Val: I) && !cast<CallInst>(Val: I)->doesNotAccessMemory());
98}
99
100/// Iterates through instructions in a set of blocks in reverse order from the
101/// first non-terminator. For example (assume all blocks have size n):
102/// LockstepReverseIterator I([B1, B2, B3]);
103/// *I-- = [B1[n], B2[n], B3[n]];
104/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
105/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
106/// ...
107///
108/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
109/// to
110/// determine which blocks are still going and the order they appear in the
111/// list returned by operator*.
112class LockstepReverseIterator {
113 ArrayRef<BasicBlock *> Blocks;
114 SmallSetVector<BasicBlock *, 4> ActiveBlocks;
115 SmallVector<Instruction *, 4> Insts;
116 bool Fail;
117
118public:
119 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
120 reset();
121 }
122
123 void reset() {
124 Fail = false;
125 ActiveBlocks.clear();
126 for (BasicBlock *BB : Blocks)
127 ActiveBlocks.insert(X: BB);
128 Insts.clear();
129 for (BasicBlock *BB : Blocks) {
130 if (BB->size() <= 1) {
131 // Block wasn't big enough - only contained a terminator.
132 ActiveBlocks.remove(X: BB);
133 continue;
134 }
135 Insts.push_back(Elt: BB->getTerminator()->getPrevNonDebugInstruction());
136 }
137 if (Insts.empty())
138 Fail = true;
139 }
140
141 bool isValid() const { return !Fail; }
142 ArrayRef<Instruction *> operator*() const { return Insts; }
143
144 // Note: This needs to return a SmallSetVector as the elements of
145 // ActiveBlocks will be later copied to Blocks using std::copy. The
146 // resultant order of elements in Blocks needs to be deterministic.
147 // Using SmallPtrSet instead causes non-deterministic order while
148 // copying. And we cannot simply sort Blocks as they need to match the
149 // corresponding Values.
150 SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
151
152 void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
153 for (auto II = Insts.begin(); II != Insts.end();) {
154 if (!Blocks.contains(key: (*II)->getParent())) {
155 ActiveBlocks.remove(X: (*II)->getParent());
156 II = Insts.erase(CI: II);
157 } else {
158 ++II;
159 }
160 }
161 }
162
163 void operator--() {
164 if (Fail)
165 return;
166 SmallVector<Instruction *, 4> NewInsts;
167 for (auto *Inst : Insts) {
168 if (Inst == &Inst->getParent()->front())
169 ActiveBlocks.remove(X: Inst->getParent());
170 else
171 NewInsts.push_back(Elt: Inst->getPrevNonDebugInstruction());
172 }
173 if (NewInsts.empty()) {
174 Fail = true;
175 return;
176 }
177 Insts = NewInsts;
178 }
179};
180
181//===----------------------------------------------------------------------===//
182
183/// Candidate solution for sinking. There may be different ways to
184/// sink instructions, differing in the number of instructions sunk,
185/// the number of predecessors sunk from and the number of PHIs
186/// required.
187struct SinkingInstructionCandidate {
188 unsigned NumBlocks;
189 unsigned NumInstructions;
190 unsigned NumPHIs;
191 unsigned NumMemoryInsts;
192 int Cost = -1;
193 SmallVector<BasicBlock *, 4> Blocks;
194
195 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
196 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
197 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
198 Cost = (NumInstructions * (NumBlocks - 1)) -
199 (NumExtraPHIs *
200 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
201 - SplitEdgeCost;
202 }
203
204 bool operator>(const SinkingInstructionCandidate &Other) const {
205 return Cost > Other.Cost;
206 }
207};
208
209#ifndef NDEBUG
210raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
211 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
212 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
213 return OS;
214}
215#endif
216
217//===----------------------------------------------------------------------===//
218
219/// Describes a PHI node that may or may not exist. These track the PHIs
220/// that must be created if we sunk a sequence of instructions. It provides
221/// a hash function for efficient equality comparisons.
222class ModelledPHI {
223 SmallVector<Value *, 4> Values;
224 SmallVector<BasicBlock *, 4> Blocks;
225
226public:
227 ModelledPHI() = default;
228
229 ModelledPHI(const PHINode *PN,
230 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
231 // BasicBlock comes first so we sort by basic block pointer order,
232 // then by value pointer order. No need to call `verifyModelledPHI`
233 // As the Values and Blocks are populated in a deterministic order.
234 using OpsType = std::pair<BasicBlock *, Value *>;
235 SmallVector<OpsType, 4> Ops;
236 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
237 Ops.push_back(Elt: {PN->getIncomingBlock(i: I), PN->getIncomingValue(i: I)});
238
239 auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
240 return BlockOrder.lookup(Val: O1.first) < BlockOrder.lookup(Val: O2.first);
241 };
242 // Sort in a deterministic order.
243 llvm::sort(C&: Ops, Comp: ComesBefore);
244
245 for (auto &P : Ops) {
246 Blocks.push_back(Elt: P.first);
247 Values.push_back(Elt: P.second);
248 }
249 }
250
251 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
252 /// without the same ID.
253 /// \note This is specifically for DenseMapInfo - do not use this!
254 static ModelledPHI createDummy(size_t ID) {
255 ModelledPHI M;
256 M.Values.push_back(Elt: reinterpret_cast<Value*>(ID));
257 return M;
258 }
259
260 void
261 verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
262 assert(Values.size() > 1 && Blocks.size() > 1 &&
263 "Modelling PHI with less than 2 values");
264 auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
265 const BasicBlock *BB2) {
266 return BlockOrder.lookup(Val: BB1) < BlockOrder.lookup(Val: BB2);
267 };
268 assert(llvm::is_sorted(Blocks, ComesBefore));
269 int C = 0;
270 for (const Value *V : Values) {
271 if (!isa<UndefValue>(Val: V)) {
272 assert(cast<Instruction>(V)->getParent() == Blocks[C]);
273 (void)C;
274 }
275 C++;
276 }
277 }
278 /// Create a PHI from an array of incoming values and incoming blocks.
279 ModelledPHI(SmallVectorImpl<Instruction *> &V,
280 SmallSetVector<BasicBlock *, 4> &B,
281 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
282 // The order of Values and Blocks are already ordered by the caller.
283 llvm::copy(Range&: V, Out: std::back_inserter(x&: Values));
284 llvm::copy(Range&: B, Out: std::back_inserter(x&: Blocks));
285 verifyModelledPHI(BlockOrder);
286 }
287
288 /// Create a PHI from [I[OpNum] for I in Insts].
289 /// TODO: Figure out a way to verifyModelledPHI in this constructor.
290 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
291 SmallSetVector<BasicBlock *, 4> &B) {
292 llvm::copy(Range&: B, Out: std::back_inserter(x&: Blocks));
293 for (auto *I : Insts)
294 Values.push_back(Elt: I->getOperand(i: OpNum));
295 }
296
297 /// Restrict the PHI's contents down to only \c NewBlocks.
298 /// \c NewBlocks must be a subset of \c this->Blocks.
299 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
300 auto BI = Blocks.begin();
301 auto VI = Values.begin();
302 while (BI != Blocks.end()) {
303 assert(VI != Values.end());
304 if (!NewBlocks.contains(key: *BI)) {
305 BI = Blocks.erase(CI: BI);
306 VI = Values.erase(CI: VI);
307 } else {
308 ++BI;
309 ++VI;
310 }
311 }
312 assert(Blocks.size() == NewBlocks.size());
313 }
314
315 ArrayRef<Value *> getValues() const { return Values; }
316
317 bool areAllIncomingValuesSame() const {
318 return llvm::all_equal(Range: Values);
319 }
320
321 bool areAllIncomingValuesSameType() const {
322 return llvm::all_of(
323 Range: Values, P: [&](Value *V) { return V->getType() == Values[0]->getType(); });
324 }
325
326 bool areAnyIncomingValuesConstant() const {
327 return llvm::any_of(Range: Values, P: [&](Value *V) { return isa<Constant>(Val: V); });
328 }
329
330 // Hash functor
331 unsigned hash() const {
332 // Is deterministic because Values are saved in a specific order.
333 return (unsigned)hash_combine_range(first: Values.begin(), last: Values.end());
334 }
335
336 bool operator==(const ModelledPHI &Other) const {
337 return Values == Other.Values && Blocks == Other.Blocks;
338 }
339};
340
341template <typename ModelledPHI> struct DenseMapInfo {
342 static inline ModelledPHI &getEmptyKey() {
343 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
344 return Dummy;
345 }
346
347 static inline ModelledPHI &getTombstoneKey() {
348 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
349 return Dummy;
350 }
351
352 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
353
354 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
355 return LHS == RHS;
356 }
357};
358
359using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
360
361//===----------------------------------------------------------------------===//
362// ValueTable
363//===----------------------------------------------------------------------===//
364// This is a value number table where the value number is a function of the
365// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
366// that the program would be equivalent if we replaced A with PHI(A, B).
367//===----------------------------------------------------------------------===//
368
369/// A GVN expression describing how an instruction is used. The operands
370/// field of BasicExpression is used to store uses, not operands.
371///
372/// This class also contains fields for discriminators used when determining
373/// equivalence of instructions with sideeffects.
374class InstructionUseExpr : public GVNExpression::BasicExpression {
375 unsigned MemoryUseOrder = -1;
376 bool Volatile = false;
377 ArrayRef<int> ShuffleMask;
378
379public:
380 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
381 BumpPtrAllocator &A)
382 : GVNExpression::BasicExpression(I->getNumUses()) {
383 allocateOperands(Recycler&: R, Allocator&: A);
384 setOpcode(I->getOpcode());
385 setType(I->getType());
386
387 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Val: I))
388 ShuffleMask = SVI->getShuffleMask().copy(A);
389
390 for (auto &U : I->uses())
391 op_push_back(Arg: U.getUser());
392 llvm::sort(Start: op_begin(), End: op_end());
393 }
394
395 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
396 void setVolatile(bool V) { Volatile = V; }
397
398 hash_code getHashValue() const override {
399 return hash_combine(args: GVNExpression::BasicExpression::getHashValue(),
400 args: MemoryUseOrder, args: Volatile, args: ShuffleMask);
401 }
402
403 template <typename Function> hash_code getHashValue(Function MapFn) {
404 hash_code H = hash_combine(args: getOpcode(), args: getType(), args: MemoryUseOrder, args: Volatile,
405 args: ShuffleMask);
406 for (auto *V : operands())
407 H = hash_combine(H, MapFn(V));
408 return H;
409 }
410};
411
412using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
413
414class ValueTable {
415 DenseMap<Value *, uint32_t> ValueNumbering;
416 DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
417 DenseMap<size_t, uint32_t> HashNumbering;
418 BumpPtrAllocator Allocator;
419 ArrayRecycler<Value *> Recycler;
420 uint32_t nextValueNumber = 1;
421 BasicBlocksSet ReachableBBs;
422
423 /// Create an expression for I based on its opcode and its uses. If I
424 /// touches or reads memory, the expression is also based upon its memory
425 /// order - see \c getMemoryUseOrder().
426 InstructionUseExpr *createExpr(Instruction *I) {
427 InstructionUseExpr *E =
428 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
429 if (isMemoryInst(I))
430 E->setMemoryUseOrder(getMemoryUseOrder(Inst: I));
431
432 if (CmpInst *C = dyn_cast<CmpInst>(Val: I)) {
433 CmpInst::Predicate Predicate = C->getPredicate();
434 E->setOpcode((C->getOpcode() << 8) | Predicate);
435 }
436 return E;
437 }
438
439 /// Helper to compute the value number for a memory instruction
440 /// (LoadInst/StoreInst), including checking the memory ordering and
441 /// volatility.
442 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
443 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
444 return nullptr;
445 InstructionUseExpr *E = createExpr(I);
446 E->setVolatile(I->isVolatile());
447 return E;
448 }
449
450public:
451 ValueTable() = default;
452
453 /// Set basic blocks reachable from entry block.
454 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
455 this->ReachableBBs = ReachableBBs;
456 }
457
458 /// Returns the value number for the specified value, assigning
459 /// it a new number if it did not have one before.
460 uint32_t lookupOrAdd(Value *V) {
461 auto VI = ValueNumbering.find(Val: V);
462 if (VI != ValueNumbering.end())
463 return VI->second;
464
465 if (!isa<Instruction>(Val: V)) {
466 ValueNumbering[V] = nextValueNumber;
467 return nextValueNumber++;
468 }
469
470 Instruction *I = cast<Instruction>(Val: V);
471 if (!ReachableBBs.contains(Ptr: I->getParent()))
472 return ~0U;
473
474 InstructionUseExpr *exp = nullptr;
475 switch (I->getOpcode()) {
476 case Instruction::Load:
477 exp = createMemoryExpr(I: cast<LoadInst>(Val: I));
478 break;
479 case Instruction::Store:
480 exp = createMemoryExpr(I: cast<StoreInst>(Val: I));
481 break;
482 case Instruction::Call:
483 case Instruction::Invoke:
484 case Instruction::FNeg:
485 case Instruction::Add:
486 case Instruction::FAdd:
487 case Instruction::Sub:
488 case Instruction::FSub:
489 case Instruction::Mul:
490 case Instruction::FMul:
491 case Instruction::UDiv:
492 case Instruction::SDiv:
493 case Instruction::FDiv:
494 case Instruction::URem:
495 case Instruction::SRem:
496 case Instruction::FRem:
497 case Instruction::Shl:
498 case Instruction::LShr:
499 case Instruction::AShr:
500 case Instruction::And:
501 case Instruction::Or:
502 case Instruction::Xor:
503 case Instruction::ICmp:
504 case Instruction::FCmp:
505 case Instruction::Trunc:
506 case Instruction::ZExt:
507 case Instruction::SExt:
508 case Instruction::FPToUI:
509 case Instruction::FPToSI:
510 case Instruction::UIToFP:
511 case Instruction::SIToFP:
512 case Instruction::FPTrunc:
513 case Instruction::FPExt:
514 case Instruction::PtrToInt:
515 case Instruction::IntToPtr:
516 case Instruction::BitCast:
517 case Instruction::AddrSpaceCast:
518 case Instruction::Select:
519 case Instruction::ExtractElement:
520 case Instruction::InsertElement:
521 case Instruction::ShuffleVector:
522 case Instruction::InsertValue:
523 case Instruction::GetElementPtr:
524 exp = createExpr(I);
525 break;
526 default:
527 break;
528 }
529
530 if (!exp) {
531 ValueNumbering[V] = nextValueNumber;
532 return nextValueNumber++;
533 }
534
535 uint32_t e = ExpressionNumbering[exp];
536 if (!e) {
537 hash_code H = exp->getHashValue(MapFn: [=](Value *V) { return lookupOrAdd(V); });
538 auto I = HashNumbering.find(Val: H);
539 if (I != HashNumbering.end()) {
540 e = I->second;
541 } else {
542 e = nextValueNumber++;
543 HashNumbering[H] = e;
544 ExpressionNumbering[exp] = e;
545 }
546 }
547 ValueNumbering[V] = e;
548 return e;
549 }
550
551 /// Returns the value number of the specified value. Fails if the value has
552 /// not yet been numbered.
553 uint32_t lookup(Value *V) const {
554 auto VI = ValueNumbering.find(Val: V);
555 assert(VI != ValueNumbering.end() && "Value not numbered?");
556 return VI->second;
557 }
558
559 /// Removes all value numberings and resets the value table.
560 void clear() {
561 ValueNumbering.clear();
562 ExpressionNumbering.clear();
563 HashNumbering.clear();
564 Recycler.clear(Allocator);
565 nextValueNumber = 1;
566 }
567
568 /// \c Inst uses or touches memory. Return an ID describing the memory state
569 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
570 /// the exact same memory operations happen after I1 and I2.
571 ///
572 /// This is a very hard problem in general, so we use domain-specific
573 /// knowledge that we only ever check for equivalence between blocks sharing a
574 /// single immediate successor that is common, and when determining if I1 ==
575 /// I2 we will have already determined that next(I1) == next(I2). This
576 /// inductive property allows us to simply return the value number of the next
577 /// instruction that defines memory.
578 uint32_t getMemoryUseOrder(Instruction *Inst) {
579 auto *BB = Inst->getParent();
580 for (auto I = std::next(x: Inst->getIterator()), E = BB->end();
581 I != E && !I->isTerminator(); ++I) {
582 if (!isMemoryInst(I: &*I))
583 continue;
584 if (isa<LoadInst>(Val: &*I))
585 continue;
586 CallInst *CI = dyn_cast<CallInst>(Val: &*I);
587 if (CI && CI->onlyReadsMemory())
588 continue;
589 InvokeInst *II = dyn_cast<InvokeInst>(Val: &*I);
590 if (II && II->onlyReadsMemory())
591 continue;
592 return lookupOrAdd(V: &*I);
593 }
594 return 0;
595 }
596};
597
598//===----------------------------------------------------------------------===//
599
600class GVNSink {
601public:
602 GVNSink() {}
603
604 bool run(Function &F) {
605 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
606 << "\n");
607
608 unsigned NumSunk = 0;
609 ReversePostOrderTraversal<Function*> RPOT(&F);
610 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
611 // Populate reverse post-order to order basic blocks in deterministic
612 // order. Any arbitrary ordering will work in this case as long as they are
613 // deterministic. The node ordering of newly created basic blocks
614 // are irrelevant because RPOT(for computing sinkable candidates) is also
615 // obtained ahead of time and only their order are relevant for this pass.
616 unsigned NodeOrdering = 0;
617 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
618 for (auto *BB : RPOT)
619 if (!pred_empty(BB))
620 RPOTOrder[BB] = ++NodeOrdering;
621 for (auto *N : RPOT)
622 NumSunk += sinkBB(BBEnd: N);
623
624 return NumSunk > 0;
625 }
626
627private:
628 ValueTable VN;
629 DenseMap<const BasicBlock *, unsigned> RPOTOrder;
630
631 bool shouldAvoidSinkingInstruction(Instruction *I) {
632 // These instructions may change or break semantics if moved.
633 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
634 I->getType()->isTokenTy())
635 return true;
636 return false;
637 }
638
639 /// The main heuristic function. Analyze the set of instructions pointed to by
640 /// LRI and return a candidate solution if these instructions can be sunk, or
641 /// std::nullopt otherwise.
642 std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
643 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
644 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
645
646 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
647 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
648 SmallPtrSetImpl<Value *> &PHIContents) {
649 for (PHINode &PN : BB->phis()) {
650 auto MPHI = ModelledPHI(&PN, RPOTOrder);
651 PHIs.insert(V: MPHI);
652 for (auto *V : MPHI.getValues())
653 PHIContents.insert(Ptr: V);
654 }
655 }
656
657 /// The main instruction sinking driver. Set up state and try and sink
658 /// instructions into BBEnd from its predecessors.
659 unsigned sinkBB(BasicBlock *BBEnd);
660
661 /// Perform the actual mechanics of sinking an instruction from Blocks into
662 /// BBEnd, which is their only successor.
663 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
664
665 /// Remove PHIs that all have the same incoming value.
666 void foldPointlessPHINodes(BasicBlock *BB) {
667 auto I = BB->begin();
668 while (PHINode *PN = dyn_cast<PHINode>(Val: I++)) {
669 if (!llvm::all_of(Range: PN->incoming_values(), P: [&](const Value *V) {
670 return V == PN->getIncomingValue(i: 0);
671 }))
672 continue;
673 if (PN->getIncomingValue(i: 0) != PN)
674 PN->replaceAllUsesWith(V: PN->getIncomingValue(i: 0));
675 else
676 PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
677 PN->eraseFromParent();
678 }
679 }
680};
681
682std::optional<SinkingInstructionCandidate>
683GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
684 unsigned &InstNum,
685 unsigned &MemoryInstNum,
686 ModelledPHISet &NeededPHIs,
687 SmallPtrSetImpl<Value *> &PHIContents) {
688 auto Insts = *LRI;
689 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
690 : Insts) {
691 I->dump();
692 } dbgs() << " ]\n";);
693
694 DenseMap<uint32_t, unsigned> VNums;
695 for (auto *I : Insts) {
696 uint32_t N = VN.lookupOrAdd(V: I);
697 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
698 if (N == ~0U)
699 return std::nullopt;
700 VNums[N]++;
701 }
702 unsigned VNumToSink = llvm::max_element(Range&: VNums, C: llvm::less_second())->first;
703
704 if (VNums[VNumToSink] == 1)
705 // Can't sink anything!
706 return std::nullopt;
707
708 // Now restrict the number of incoming blocks down to only those with
709 // VNumToSink.
710 auto &ActivePreds = LRI.getActiveBlocks();
711 unsigned InitialActivePredSize = ActivePreds.size();
712 SmallVector<Instruction *, 4> NewInsts;
713 for (auto *I : Insts) {
714 if (VN.lookup(V: I) != VNumToSink)
715 ActivePreds.remove(X: I->getParent());
716 else
717 NewInsts.push_back(Elt: I);
718 }
719 for (auto *I : NewInsts)
720 if (shouldAvoidSinkingInstruction(I))
721 return std::nullopt;
722
723 // If we've restricted the incoming blocks, restrict all needed PHIs also
724 // to that set.
725 bool RecomputePHIContents = false;
726 if (ActivePreds.size() != InitialActivePredSize) {
727 ModelledPHISet NewNeededPHIs;
728 for (auto P : NeededPHIs) {
729 P.restrictToBlocks(NewBlocks: ActivePreds);
730 NewNeededPHIs.insert(V: P);
731 }
732 NeededPHIs = NewNeededPHIs;
733 LRI.restrictToBlocks(Blocks&: ActivePreds);
734 RecomputePHIContents = true;
735 }
736
737 // The sunk instruction's results.
738 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
739
740 // Does sinking this instruction render previous PHIs redundant?
741 if (NeededPHIs.erase(V: NewPHI))
742 RecomputePHIContents = true;
743
744 if (RecomputePHIContents) {
745 // The needed PHIs have changed, so recompute the set of all needed
746 // values.
747 PHIContents.clear();
748 for (auto &PHI : NeededPHIs)
749 PHIContents.insert(I: PHI.getValues().begin(), E: PHI.getValues().end());
750 }
751
752 // Is this instruction required by a later PHI that doesn't match this PHI?
753 // if so, we can't sink this instruction.
754 for (auto *V : NewPHI.getValues())
755 if (PHIContents.count(Ptr: V))
756 // V exists in this PHI, but the whole PHI is different to NewPHI
757 // (else it would have been removed earlier). We cannot continue
758 // because this isn't representable.
759 return std::nullopt;
760
761 // Which operands need PHIs?
762 // FIXME: If any of these fail, we should partition up the candidates to
763 // try and continue making progress.
764 Instruction *I0 = NewInsts[0];
765
766 auto isNotSameOperation = [&I0](Instruction *I) {
767 return !I0->isSameOperationAs(I);
768 };
769
770 if (any_of(Range&: NewInsts, P: isNotSameOperation))
771 return std::nullopt;
772
773 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
774 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
775 if (PHI.areAllIncomingValuesSame())
776 continue;
777 if (!canReplaceOperandWithVariable(I: I0, OpIdx: OpNum))
778 // We can 't create a PHI from this instruction!
779 return std::nullopt;
780 if (NeededPHIs.count(V: PHI))
781 continue;
782 if (!PHI.areAllIncomingValuesSameType())
783 return std::nullopt;
784 // Don't create indirect calls! The called value is the final operand.
785 if ((isa<CallInst>(Val: I0) || isa<InvokeInst>(Val: I0)) && OpNum == E - 1 &&
786 PHI.areAnyIncomingValuesConstant())
787 return std::nullopt;
788
789 NeededPHIs.reserve(Size: NeededPHIs.size());
790 NeededPHIs.insert(V: PHI);
791 PHIContents.insert(I: PHI.getValues().begin(), E: PHI.getValues().end());
792 }
793
794 if (isMemoryInst(I: NewInsts[0]))
795 ++MemoryInstNum;
796
797 SinkingInstructionCandidate Cand;
798 Cand.NumInstructions = ++InstNum;
799 Cand.NumMemoryInsts = MemoryInstNum;
800 Cand.NumBlocks = ActivePreds.size();
801 Cand.NumPHIs = NeededPHIs.size();
802 append_range(C&: Cand.Blocks, R&: ActivePreds);
803
804 return Cand;
805}
806
807unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
808 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
809 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
810 SmallVector<BasicBlock *, 4> Preds;
811 for (auto *B : predecessors(BB: BBEnd)) {
812 // Bailout on basic blocks without predecessor(PR42346).
813 if (!RPOTOrder.count(Val: B))
814 return 0;
815 auto *T = B->getTerminator();
816 if (isa<BranchInst>(Val: T) || isa<SwitchInst>(Val: T))
817 Preds.push_back(Elt: B);
818 else
819 return 0;
820 }
821 if (Preds.size() < 2)
822 return 0;
823 auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
824 return RPOTOrder.lookup(Val: BB1) < RPOTOrder.lookup(Val: BB2);
825 };
826 // Sort in a deterministic order.
827 llvm::sort(C&: Preds, Comp: ComesBefore);
828
829 unsigned NumOrigPreds = Preds.size();
830 // We can only sink instructions through unconditional branches.
831 llvm::erase_if(C&: Preds, P: [](BasicBlock *BB) {
832 return BB->getTerminator()->getNumSuccessors() != 1;
833 });
834
835 LockstepReverseIterator LRI(Preds);
836 SmallVector<SinkingInstructionCandidate, 4> Candidates;
837 unsigned InstNum = 0, MemoryInstNum = 0;
838 ModelledPHISet NeededPHIs;
839 SmallPtrSet<Value *, 4> PHIContents;
840 analyzeInitialPHIs(BB: BBEnd, PHIs&: NeededPHIs, PHIContents);
841 unsigned NumOrigPHIs = NeededPHIs.size();
842
843 while (LRI.isValid()) {
844 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
845 NeededPHIs, PHIContents);
846 if (!Cand)
847 break;
848 Cand->calculateCost(NumOrigPHIs, NumOrigBlocks: Preds.size());
849 Candidates.emplace_back(Args&: *Cand);
850 --LRI;
851 }
852
853 llvm::stable_sort(Range&: Candidates, C: std::greater<SinkingInstructionCandidate>());
854 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
855 : Candidates) dbgs()
856 << " " << C << "\n";);
857
858 // Pick the top candidate, as long it is positive!
859 if (Candidates.empty() || Candidates.front().Cost <= 0)
860 return 0;
861 auto C = Candidates.front();
862
863 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
864 BasicBlock *InsertBB = BBEnd;
865 if (C.Blocks.size() < NumOrigPreds) {
866 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
867 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
868 InsertBB = SplitBlockPredecessors(BB: BBEnd, Preds: C.Blocks, Suffix: ".gvnsink.split");
869 if (!InsertBB) {
870 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
871 // Edge couldn't be split.
872 return 0;
873 }
874 }
875
876 for (unsigned I = 0; I < C.NumInstructions; ++I)
877 sinkLastInstruction(Blocks: C.Blocks, BBEnd: InsertBB);
878
879 return C.NumInstructions;
880}
881
882void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
883 BasicBlock *BBEnd) {
884 SmallVector<Instruction *, 4> Insts;
885 for (BasicBlock *BB : Blocks)
886 Insts.push_back(Elt: BB->getTerminator()->getPrevNonDebugInstruction());
887 Instruction *I0 = Insts.front();
888
889 SmallVector<Value *, 4> NewOperands;
890 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
891 bool NeedPHI = llvm::any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
892 return I->getOperand(i: O) != I0->getOperand(i: O);
893 });
894 if (!NeedPHI) {
895 NewOperands.push_back(Elt: I0->getOperand(i: O));
896 continue;
897 }
898
899 // Create a new PHI in the successor block and populate it.
900 auto *Op = I0->getOperand(i: O);
901 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
902 auto *PN =
903 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
904 PN->insertBefore(InsertPos: BBEnd->begin());
905 for (auto *I : Insts)
906 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
907 NewOperands.push_back(Elt: PN);
908 }
909
910 // Arbitrarily use I0 as the new "common" instruction; remap its operands
911 // and move it to the start of the successor block.
912 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
913 I0->getOperandUse(i: O).set(NewOperands[O]);
914 I0->moveBefore(MovePos: &*BBEnd->getFirstInsertionPt());
915
916 // Update metadata and IR flags.
917 for (auto *I : Insts)
918 if (I != I0) {
919 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
920 I0->andIRFlags(V: I);
921 }
922
923 for (auto *I : Insts)
924 if (I != I0) {
925 I->replaceAllUsesWith(V: I0);
926 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
927 }
928 foldPointlessPHINodes(BB: BBEnd);
929
930 // Finally nuke all instructions apart from the common instruction.
931 for (auto *I : Insts)
932 if (I != I0)
933 I->eraseFromParent();
934
935 NumRemoved += Insts.size() - 1;
936}
937
938} // end anonymous namespace
939
940PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
941 GVNSink G;
942 if (!G.run(F))
943 return PreservedAnalyses::all();
944
945 return PreservedAnalyses::none();
946}
947