1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseSet.h"
39#include "llvm/ADT/Hashing.h"
40#include "llvm/ADT/PostOrderIterator.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Analysis/GlobalsModRef.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
57#include "llvm/Support/Allocator.h"
58#include "llvm/Support/ArrayRecycler.h"
59#include "llvm/Support/AtomicOrdering.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/Compiler.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/Scalar/GVN.h"
65#include "llvm/Transforms/Scalar/GVNExpression.h"
66#include "llvm/Transforms/Utils/BasicBlockUtils.h"
67#include "llvm/Transforms/Utils/Local.h"
68#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
69#include <cassert>
70#include <cstddef>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76using namespace llvm::GVNExpression;
77
78#define DEBUG_TYPE "gvn-sink"
79
80STATISTIC(NumRemoved, "Number of instructions removed");
81
82LLVM_DUMP_METHOD void Expression::dump() const {
83 print(OS&: dbgs());
84 dbgs() << "\n";
85}
86
87static bool isMemoryInst(const Instruction *I) {
88 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) ||
89 (isa<InvokeInst>(Val: I) && !cast<InvokeInst>(Val: I)->doesNotAccessMemory()) ||
90 (isa<CallInst>(Val: I) && !cast<CallInst>(Val: I)->doesNotAccessMemory());
91}
92
93//===----------------------------------------------------------------------===//
94
95namespace {
96
97/// Candidate solution for sinking. There may be different ways to
98/// sink instructions, differing in the number of instructions sunk,
99/// the number of predecessors sunk from and the number of PHIs
100/// required.
101struct SinkingInstructionCandidate {
102 unsigned NumBlocks;
103 unsigned NumInstructions;
104 unsigned NumPHIs;
105 unsigned NumMemoryInsts;
106 int Cost = -1;
107 SmallVector<BasicBlock *, 4> Blocks;
108
109 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
110 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
111 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
112 Cost = (NumInstructions * (NumBlocks - 1)) -
113 (NumExtraPHIs *
114 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
115 - SplitEdgeCost;
116 }
117
118 bool operator>(const SinkingInstructionCandidate &Other) const {
119 return Cost > Other.Cost;
120 }
121};
122
123//===----------------------------------------------------------------------===//
124
125/// Describes a PHI node that may or may not exist. These track the PHIs
126/// that must be created if we sunk a sequence of instructions. It provides
127/// a hash function for efficient equality comparisons.
128class ModelledPHI {
129 SmallVector<Value *, 4> Values;
130 SmallVector<BasicBlock *, 4> Blocks;
131
132public:
133 ModelledPHI() = default;
134
135 ModelledPHI(const PHINode *PN,
136 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
137 // BasicBlock comes first so we sort by basic block pointer order,
138 // then by value pointer order. No need to call `verifyModelledPHI`
139 // As the Values and Blocks are populated in a deterministic order.
140 using OpsType = std::pair<BasicBlock *, Value *>;
141 SmallVector<OpsType, 4> Ops;
142 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
143 Ops.push_back(Elt: {PN->getIncomingBlock(i: I), PN->getIncomingValue(i: I)});
144
145 auto ComesBefore = [&](OpsType O1, OpsType O2) {
146 return BlockOrder.lookup(Val: O1.first) < BlockOrder.lookup(Val: O2.first);
147 };
148 // Sort in a deterministic order.
149 llvm::sort(C&: Ops, Comp: ComesBefore);
150
151 for (auto &P : Ops) {
152 Blocks.push_back(Elt: P.first);
153 Values.push_back(Elt: P.second);
154 }
155 }
156
157 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
158 /// without the same ID.
159 /// \note This is specifically for DenseMapInfo - do not use this!
160 static ModelledPHI createDummy(size_t ID) {
161 ModelledPHI M;
162 M.Values.push_back(Elt: reinterpret_cast<Value*>(ID));
163 return M;
164 }
165
166 void
167 verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
168 assert(Values.size() > 1 && Blocks.size() > 1 &&
169 "Modelling PHI with less than 2 values");
170 [[maybe_unused]] auto ComesBefore = [&](const BasicBlock *BB1,
171 const BasicBlock *BB2) {
172 return BlockOrder.lookup(Val: BB1) < BlockOrder.lookup(Val: BB2);
173 };
174 assert(llvm::is_sorted(Blocks, ComesBefore));
175 int C = 0;
176 for (const Value *V : Values) {
177 if (!isa<UndefValue>(Val: V)) {
178 assert(cast<Instruction>(V)->getParent() == Blocks[C]);
179 (void)C;
180 }
181 C++;
182 }
183 }
184 /// Create a PHI from an array of incoming values and incoming blocks.
185 ModelledPHI(SmallVectorImpl<Instruction *> &V,
186 SmallSetVector<BasicBlock *, 4> &B,
187 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
188 // The order of Values and Blocks are already ordered by the caller.
189 llvm::append_range(C&: Values, R&: V);
190 llvm::append_range(C&: Blocks, R&: B);
191 verifyModelledPHI(BlockOrder);
192 }
193
194 /// Create a PHI from [I[OpNum] for I in Insts].
195 /// TODO: Figure out a way to verifyModelledPHI in this constructor.
196 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
197 SmallSetVector<BasicBlock *, 4> &B) {
198 llvm::append_range(C&: Blocks, R&: B);
199 for (auto *I : Insts)
200 Values.push_back(Elt: I->getOperand(i: OpNum));
201 }
202
203 /// Restrict the PHI's contents down to only \c NewBlocks.
204 /// \c NewBlocks must be a subset of \c this->Blocks.
205 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
206 auto BI = Blocks.begin();
207 auto VI = Values.begin();
208 while (BI != Blocks.end()) {
209 assert(VI != Values.end());
210 if (!NewBlocks.contains(key: *BI)) {
211 BI = Blocks.erase(CI: BI);
212 VI = Values.erase(CI: VI);
213 } else {
214 ++BI;
215 ++VI;
216 }
217 }
218 assert(Blocks.size() == NewBlocks.size());
219 }
220
221 ArrayRef<Value *> getValues() const { return Values; }
222
223 bool areAllIncomingValuesSame() const {
224 return llvm::all_equal(Range: Values);
225 }
226
227 bool areAllIncomingValuesSameType() const {
228 return llvm::all_of(
229 Range: Values, P: [&](Value *V) { return V->getType() == Values[0]->getType(); });
230 }
231
232 bool areAnyIncomingValuesConstant() const {
233 return llvm::any_of(Range: Values, P: [&](Value *V) { return isa<Constant>(Val: V); });
234 }
235
236 // Hash functor
237 unsigned hash() const {
238 // Is deterministic because Values are saved in a specific order.
239 return (unsigned)hash_combine_range(R: Values);
240 }
241
242 bool operator==(const ModelledPHI &Other) const {
243 return Values == Other.Values && Blocks == Other.Blocks;
244 }
245};
246} // namespace
247
248#ifndef NDEBUG
249static raw_ostream &operator<<(raw_ostream &OS,
250 const SinkingInstructionCandidate &C) {
251 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
252 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
253 return OS;
254}
255#endif
256
257template <> struct llvm::DenseMapInfo<ModelledPHI> {
258 static inline ModelledPHI &getEmptyKey() {
259 static ModelledPHI Dummy = ModelledPHI::createDummy(ID: 0);
260 return Dummy;
261 }
262
263 static inline ModelledPHI &getTombstoneKey() {
264 static ModelledPHI Dummy = ModelledPHI::createDummy(ID: 1);
265 return Dummy;
266 }
267
268 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
269
270 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
271 return LHS == RHS;
272 }
273};
274
275using ModelledPHISet = DenseSet<ModelledPHI>;
276
277namespace {
278
279//===----------------------------------------------------------------------===//
280// ValueTable
281//===----------------------------------------------------------------------===//
282// This is a value number table where the value number is a function of the
283// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
284// that the program would be equivalent if we replaced A with PHI(A, B).
285//===----------------------------------------------------------------------===//
286
287/// A GVN expression describing how an instruction is used. The operands
288/// field of BasicExpression is used to store uses, not operands.
289///
290/// This class also contains fields for discriminators used when determining
291/// equivalence of instructions with sideeffects.
292class InstructionUseExpr : public BasicExpression {
293 unsigned MemoryUseOrder = -1;
294 bool Volatile = false;
295 ArrayRef<int> ShuffleMask;
296
297public:
298 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
299 BumpPtrAllocator &A)
300 : BasicExpression(I->getNumUses()) {
301 allocateOperands(Recycler&: R, Allocator&: A);
302 setOpcode(I->getOpcode());
303 setType(I->getType());
304
305 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Val: I))
306 ShuffleMask = SVI->getShuffleMask().copy(A);
307
308 for (auto &U : I->uses())
309 op_push_back(Arg: U.getUser());
310 llvm::sort(C: operands());
311 }
312
313 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
314 void setVolatile(bool V) { Volatile = V; }
315
316 hash_code getHashValue() const override {
317 return hash_combine(args: BasicExpression::getHashValue(), args: MemoryUseOrder,
318 args: Volatile, args: ShuffleMask);
319 }
320
321 template <typename Function> hash_code getHashValue(Function MapFn) {
322 hash_code H = hash_combine(args: getOpcode(), args: getType(), args: MemoryUseOrder, args: Volatile,
323 args: ShuffleMask);
324 for (auto *V : operands())
325 H = hash_combine(H, MapFn(V));
326 return H;
327 }
328};
329
330using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
331
332class ValueTable {
333 DenseMap<Value *, uint32_t> ValueNumbering;
334 DenseMap<Expression *, uint32_t> ExpressionNumbering;
335 DenseMap<size_t, uint32_t> HashNumbering;
336 BumpPtrAllocator Allocator;
337 ArrayRecycler<Value *> Recycler;
338 uint32_t nextValueNumber = 1;
339 BasicBlocksSet ReachableBBs;
340
341 /// Create an expression for I based on its opcode and its uses. If I
342 /// touches or reads memory, the expression is also based upon its memory
343 /// order - see \c getMemoryUseOrder().
344 InstructionUseExpr *createExpr(Instruction *I) {
345 InstructionUseExpr *E =
346 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
347 if (isMemoryInst(I))
348 E->setMemoryUseOrder(getMemoryUseOrder(Inst: I));
349
350 if (CmpInst *C = dyn_cast<CmpInst>(Val: I)) {
351 CmpInst::Predicate Predicate = C->getPredicate();
352 E->setOpcode((C->getOpcode() << 8) | Predicate);
353 }
354 return E;
355 }
356
357 /// Helper to compute the value number for a memory instruction
358 /// (LoadInst/StoreInst), including checking the memory ordering and
359 /// volatility.
360 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
361 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
362 return nullptr;
363 InstructionUseExpr *E = createExpr(I);
364 E->setVolatile(I->isVolatile());
365 return E;
366 }
367
368public:
369 ValueTable() = default;
370
371 /// Set basic blocks reachable from entry block.
372 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
373 this->ReachableBBs = ReachableBBs;
374 }
375
376 /// Returns the value number for the specified value, assigning
377 /// it a new number if it did not have one before.
378 uint32_t lookupOrAdd(Value *V) {
379 auto VI = ValueNumbering.find(Val: V);
380 if (VI != ValueNumbering.end())
381 return VI->second;
382
383 if (!isa<Instruction>(Val: V)) {
384 ValueNumbering[V] = nextValueNumber;
385 return nextValueNumber++;
386 }
387
388 Instruction *I = cast<Instruction>(Val: V);
389 if (!ReachableBBs.contains(Ptr: I->getParent()))
390 return ~0U;
391
392 InstructionUseExpr *exp = nullptr;
393 switch (I->getOpcode()) {
394 case Instruction::Load:
395 exp = createMemoryExpr(I: cast<LoadInst>(Val: I));
396 break;
397 case Instruction::Store:
398 exp = createMemoryExpr(I: cast<StoreInst>(Val: I));
399 break;
400 case Instruction::Call:
401 case Instruction::Invoke:
402 case Instruction::FNeg:
403 case Instruction::Add:
404 case Instruction::FAdd:
405 case Instruction::Sub:
406 case Instruction::FSub:
407 case Instruction::Mul:
408 case Instruction::FMul:
409 case Instruction::UDiv:
410 case Instruction::SDiv:
411 case Instruction::FDiv:
412 case Instruction::URem:
413 case Instruction::SRem:
414 case Instruction::FRem:
415 case Instruction::Shl:
416 case Instruction::LShr:
417 case Instruction::AShr:
418 case Instruction::And:
419 case Instruction::Or:
420 case Instruction::Xor:
421 case Instruction::ICmp:
422 case Instruction::FCmp:
423 case Instruction::Trunc:
424 case Instruction::ZExt:
425 case Instruction::SExt:
426 case Instruction::FPToUI:
427 case Instruction::FPToSI:
428 case Instruction::UIToFP:
429 case Instruction::SIToFP:
430 case Instruction::FPTrunc:
431 case Instruction::FPExt:
432 case Instruction::PtrToInt:
433 case Instruction::PtrToAddr:
434 case Instruction::IntToPtr:
435 case Instruction::BitCast:
436 case Instruction::AddrSpaceCast:
437 case Instruction::Select:
438 case Instruction::ExtractElement:
439 case Instruction::InsertElement:
440 case Instruction::ShuffleVector:
441 case Instruction::InsertValue:
442 case Instruction::GetElementPtr:
443 exp = createExpr(I);
444 break;
445 default:
446 break;
447 }
448
449 if (!exp) {
450 ValueNumbering[V] = nextValueNumber;
451 return nextValueNumber++;
452 }
453
454 uint32_t e = ExpressionNumbering[exp];
455 if (!e) {
456 hash_code H = exp->getHashValue(MapFn: [=](Value *V) { return lookupOrAdd(V); });
457 auto [I, Inserted] = HashNumbering.try_emplace(Key: H, Args&: nextValueNumber);
458 e = I->second;
459 if (Inserted)
460 ExpressionNumbering[exp] = nextValueNumber++;
461 }
462 ValueNumbering[V] = e;
463 return e;
464 }
465
466 /// Returns the value number of the specified value. Fails if the value has
467 /// not yet been numbered.
468 uint32_t lookup(Value *V) const {
469 auto VI = ValueNumbering.find(Val: V);
470 assert(VI != ValueNumbering.end() && "Value not numbered?");
471 return VI->second;
472 }
473
474 /// Removes all value numberings and resets the value table.
475 void clear() {
476 ValueNumbering.clear();
477 ExpressionNumbering.clear();
478 HashNumbering.clear();
479 Recycler.clear(Allocator);
480 nextValueNumber = 1;
481 }
482
483 /// \c Inst uses or touches memory. Return an ID describing the memory state
484 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
485 /// the exact same memory operations happen after I1 and I2.
486 ///
487 /// This is a very hard problem in general, so we use domain-specific
488 /// knowledge that we only ever check for equivalence between blocks sharing a
489 /// single immediate successor that is common, and when determining if I1 ==
490 /// I2 we will have already determined that next(I1) == next(I2). This
491 /// inductive property allows us to simply return the value number of the next
492 /// instruction that defines memory.
493 uint32_t getMemoryUseOrder(Instruction *Inst) {
494 auto *BB = Inst->getParent();
495 for (auto I = std::next(x: Inst->getIterator()), E = BB->end();
496 I != E && !I->isTerminator(); ++I) {
497 if (!isMemoryInst(I: &*I))
498 continue;
499 if (isa<LoadInst>(Val: &*I))
500 continue;
501 CallInst *CI = dyn_cast<CallInst>(Val: &*I);
502 if (CI && CI->onlyReadsMemory())
503 continue;
504 InvokeInst *II = dyn_cast<InvokeInst>(Val: &*I);
505 if (II && II->onlyReadsMemory())
506 continue;
507 return lookupOrAdd(V: &*I);
508 }
509 return 0;
510 }
511};
512
513//===----------------------------------------------------------------------===//
514
515class GVNSink {
516public:
517 GVNSink() = default;
518
519 bool run(Function &F) {
520 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
521 << "\n");
522
523 unsigned NumSunk = 0;
524 ReversePostOrderTraversal<Function*> RPOT(&F);
525 VN.setReachableBBs(BasicBlocksSet(llvm::from_range, RPOT));
526 // Populate reverse post-order to order basic blocks in deterministic
527 // order. Any arbitrary ordering will work in this case as long as they are
528 // deterministic. The node ordering of newly created basic blocks
529 // are irrelevant because RPOT(for computing sinkable candidates) is also
530 // obtained ahead of time and only their order are relevant for this pass.
531 unsigned NodeOrdering = 0;
532 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
533 for (auto *BB : RPOT)
534 if (!pred_empty(BB))
535 RPOTOrder[BB] = ++NodeOrdering;
536 for (auto *N : RPOT)
537 NumSunk += sinkBB(BBEnd: N);
538
539 return NumSunk > 0;
540 }
541
542private:
543 ValueTable VN;
544 DenseMap<const BasicBlock *, unsigned> RPOTOrder;
545
546 bool shouldAvoidSinkingInstruction(Instruction *I) {
547 // These instructions may change or break semantics if moved.
548 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
549 I->getType()->isTokenTy())
550 return true;
551 return false;
552 }
553
554 /// The main heuristic function. Analyze the set of instructions pointed to by
555 /// LRI and return a candidate solution if these instructions can be sunk, or
556 /// std::nullopt otherwise.
557 std::optional<SinkingInstructionCandidate>
558 analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
559 unsigned &InstNum, unsigned &MemoryInstNum,
560 ModelledPHISet &NeededPHIs,
561 SmallPtrSetImpl<Value *> &PHIContents);
562
563 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
564 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
565 SmallPtrSetImpl<Value *> &PHIContents) {
566 for (PHINode &PN : BB->phis()) {
567 auto MPHI = ModelledPHI(&PN, RPOTOrder);
568 PHIs.insert(V: MPHI);
569 PHIContents.insert_range(R: MPHI.getValues());
570 }
571 }
572
573 /// The main instruction sinking driver. Set up state and try and sink
574 /// instructions into BBEnd from its predecessors.
575 unsigned sinkBB(BasicBlock *BBEnd);
576
577 /// Perform the actual mechanics of sinking an instruction from Blocks into
578 /// BBEnd, which is their only successor.
579 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
580
581 /// Remove PHIs that all have the same incoming value.
582 void foldPointlessPHINodes(BasicBlock *BB) {
583 auto I = BB->begin();
584 while (PHINode *PN = dyn_cast<PHINode>(Val: I++)) {
585 if (!llvm::all_of(Range: PN->incoming_values(), P: [&](const Value *V) {
586 return V == PN->getIncomingValue(i: 0);
587 }))
588 continue;
589 if (PN->getIncomingValue(i: 0) != PN)
590 PN->replaceAllUsesWith(V: PN->getIncomingValue(i: 0));
591 else
592 PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
593 PN->eraseFromParent();
594 }
595 }
596};
597} // namespace
598
599std::optional<SinkingInstructionCandidate>
600GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
601 unsigned &InstNum,
602 unsigned &MemoryInstNum,
603 ModelledPHISet &NeededPHIs,
604 SmallPtrSetImpl<Value *> &PHIContents) {
605 auto Insts = *LRI;
606 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
607 : Insts) {
608 I->dump();
609 } dbgs() << " ]\n";);
610
611 DenseMap<uint32_t, unsigned> VNums;
612 for (auto *I : Insts) {
613 uint32_t N = VN.lookupOrAdd(V: I);
614 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
615 if (N == ~0U)
616 return std::nullopt;
617 VNums[N]++;
618 }
619 unsigned VNumToSink = llvm::max_element(Range&: VNums, C: llvm::less_second())->first;
620
621 if (VNums[VNumToSink] == 1)
622 // Can't sink anything!
623 return std::nullopt;
624
625 // Now restrict the number of incoming blocks down to only those with
626 // VNumToSink.
627 auto &ActivePreds = LRI.getActiveBlocks();
628 unsigned InitialActivePredSize = ActivePreds.size();
629 SmallVector<Instruction *, 4> NewInsts;
630 for (auto *I : Insts) {
631 if (VN.lookup(V: I) != VNumToSink)
632 ActivePreds.remove(X: I->getParent());
633 else
634 NewInsts.push_back(Elt: I);
635 }
636 for (auto *I : NewInsts)
637 if (shouldAvoidSinkingInstruction(I))
638 return std::nullopt;
639
640 // If we've restricted the incoming blocks, restrict all needed PHIs also
641 // to that set.
642 bool RecomputePHIContents = false;
643 if (ActivePreds.size() != InitialActivePredSize) {
644 ModelledPHISet NewNeededPHIs;
645 for (auto P : NeededPHIs) {
646 P.restrictToBlocks(NewBlocks: ActivePreds);
647 NewNeededPHIs.insert(V: P);
648 }
649 NeededPHIs = NewNeededPHIs;
650 LRI.restrictToBlocks(Blocks&: ActivePreds);
651 RecomputePHIContents = true;
652 }
653
654 // The sunk instruction's results.
655 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
656
657 // Does sinking this instruction render previous PHIs redundant?
658 if (NeededPHIs.erase(V: NewPHI))
659 RecomputePHIContents = true;
660
661 if (RecomputePHIContents) {
662 // The needed PHIs have changed, so recompute the set of all needed
663 // values.
664 PHIContents.clear();
665 for (auto &PHI : NeededPHIs)
666 PHIContents.insert_range(R: PHI.getValues());
667 }
668
669 // Is this instruction required by a later PHI that doesn't match this PHI?
670 // if so, we can't sink this instruction.
671 for (auto *V : NewPHI.getValues())
672 if (PHIContents.count(Ptr: V))
673 // V exists in this PHI, but the whole PHI is different to NewPHI
674 // (else it would have been removed earlier). We cannot continue
675 // because this isn't representable.
676 return std::nullopt;
677
678 // Which operands need PHIs?
679 // FIXME: If any of these fail, we should partition up the candidates to
680 // try and continue making progress.
681 Instruction *I0 = NewInsts[0];
682
683 auto isNotSameOperation = [&I0](Instruction *I) {
684 return !I0->isSameOperationAs(I);
685 };
686
687 if (any_of(Range&: NewInsts, P: isNotSameOperation))
688 return std::nullopt;
689
690 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
691 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
692 if (PHI.areAllIncomingValuesSame())
693 continue;
694 if (!canReplaceOperandWithVariable(I: I0, OpIdx: OpNum))
695 // We can 't create a PHI from this instruction!
696 return std::nullopt;
697 if (NeededPHIs.count(V: PHI))
698 continue;
699 if (!PHI.areAllIncomingValuesSameType())
700 return std::nullopt;
701 // Don't create indirect calls! The called value is the final operand.
702 if ((isa<CallInst>(Val: I0) || isa<InvokeInst>(Val: I0)) && OpNum == E - 1 &&
703 PHI.areAnyIncomingValuesConstant())
704 return std::nullopt;
705
706 NeededPHIs.reserve(Size: NeededPHIs.size());
707 NeededPHIs.insert(V: PHI);
708 PHIContents.insert_range(R: PHI.getValues());
709 }
710
711 if (isMemoryInst(I: NewInsts[0]))
712 ++MemoryInstNum;
713
714 SinkingInstructionCandidate Cand;
715 Cand.NumInstructions = ++InstNum;
716 Cand.NumMemoryInsts = MemoryInstNum;
717 Cand.NumBlocks = ActivePreds.size();
718 Cand.NumPHIs = NeededPHIs.size();
719 append_range(C&: Cand.Blocks, R&: ActivePreds);
720
721 return Cand;
722}
723
724unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
725 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
726 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
727 SmallVector<BasicBlock *, 4> Preds;
728 for (auto *B : predecessors(BB: BBEnd)) {
729 // Bailout on basic blocks without predecessor(PR42346).
730 if (!RPOTOrder.count(Val: B))
731 return 0;
732 auto *T = B->getTerminator();
733 if (isa<BranchInst>(Val: T) || isa<SwitchInst>(Val: T))
734 Preds.push_back(Elt: B);
735 else
736 return 0;
737 }
738 if (Preds.size() < 2)
739 return 0;
740 auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
741 return RPOTOrder.lookup(Val: BB1) < RPOTOrder.lookup(Val: BB2);
742 };
743 // Sort in a deterministic order.
744 llvm::sort(C&: Preds, Comp: ComesBefore);
745
746 unsigned NumOrigPreds = Preds.size();
747 // We can only sink instructions through unconditional branches.
748 llvm::erase_if(C&: Preds, P: [](BasicBlock *BB) {
749 return BB->getTerminator()->getNumSuccessors() != 1;
750 });
751
752 LockstepReverseIterator<false> LRI(Preds);
753 SmallVector<SinkingInstructionCandidate, 4> Candidates;
754 unsigned InstNum = 0, MemoryInstNum = 0;
755 ModelledPHISet NeededPHIs;
756 SmallPtrSet<Value *, 4> PHIContents;
757 analyzeInitialPHIs(BB: BBEnd, PHIs&: NeededPHIs, PHIContents);
758 unsigned NumOrigPHIs = NeededPHIs.size();
759
760 while (LRI.isValid()) {
761 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
762 NeededPHIs, PHIContents);
763 if (!Cand)
764 break;
765 Cand->calculateCost(NumOrigPHIs, NumOrigBlocks: Preds.size());
766 Candidates.emplace_back(Args&: *Cand);
767 --LRI;
768 }
769
770 llvm::stable_sort(Range&: Candidates, C: std::greater<SinkingInstructionCandidate>());
771 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
772 : Candidates) dbgs()
773 << " " << C << "\n";);
774
775 // Pick the top candidate, as long it is positive!
776 if (Candidates.empty() || Candidates.front().Cost <= 0)
777 return 0;
778 auto C = Candidates.front();
779
780 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
781 BasicBlock *InsertBB = BBEnd;
782 if (C.Blocks.size() < NumOrigPreds) {
783 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
784 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
785 InsertBB = SplitBlockPredecessors(BB: BBEnd, Preds: C.Blocks, Suffix: ".gvnsink.split");
786 if (!InsertBB) {
787 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
788 // Edge couldn't be split.
789 return 0;
790 }
791 }
792
793 for (unsigned I = 0; I < C.NumInstructions; ++I)
794 sinkLastInstruction(Blocks: C.Blocks, BBEnd: InsertBB);
795
796 return C.NumInstructions;
797}
798
799void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
800 BasicBlock *BBEnd) {
801 SmallVector<Instruction *, 4> Insts;
802 for (BasicBlock *BB : Blocks)
803 Insts.push_back(Elt: BB->getTerminator()->getPrevNode());
804 Instruction *I0 = Insts.front();
805
806 SmallVector<Value *, 4> NewOperands;
807 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
808 bool NeedPHI = llvm::any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
809 return I->getOperand(i: O) != I0->getOperand(i: O);
810 });
811 if (!NeedPHI) {
812 NewOperands.push_back(Elt: I0->getOperand(i: O));
813 continue;
814 }
815
816 // Create a new PHI in the successor block and populate it.
817 auto *Op = I0->getOperand(i: O);
818 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
819 auto *PN =
820 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
821 PN->insertBefore(InsertPos: BBEnd->begin());
822 for (auto *I : Insts)
823 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
824 NewOperands.push_back(Elt: PN);
825 }
826
827 // Arbitrarily use I0 as the new "common" instruction; remap its operands
828 // and move it to the start of the successor block.
829 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
830 I0->getOperandUse(i: O).set(NewOperands[O]);
831 I0->moveBefore(InsertPos: BBEnd->getFirstInsertionPt());
832
833 // Update metadata and IR flags.
834 for (auto *I : Insts)
835 if (I != I0) {
836 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
837 I0->andIRFlags(V: I);
838 }
839
840 for (auto *I : Insts)
841 if (I != I0) {
842 I->replaceAllUsesWith(V: I0);
843 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
844 }
845 foldPointlessPHINodes(BB: BBEnd);
846
847 // Finally nuke all instructions apart from the common instruction.
848 for (auto *I : Insts)
849 if (I != I0)
850 I->eraseFromParent();
851
852 NumRemoved += Insts.size() - 1;
853}
854
855PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
856 GVNSink G;
857 if (!G.run(F))
858 return PreservedAnalyses::all();
859
860 return PreservedAnalyses::none();
861}
862