1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseSet.h"
39#include "llvm/ADT/Hashing.h"
40#include "llvm/ADT/PostOrderIterator.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Analysis/GlobalsModRef.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
57#include "llvm/Support/Allocator.h"
58#include "llvm/Support/ArrayRecycler.h"
59#include "llvm/Support/AtomicOrdering.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/Compiler.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/Scalar/GVN.h"
65#include "llvm/Transforms/Scalar/GVNExpression.h"
66#include "llvm/Transforms/Utils/BasicBlockUtils.h"
67#include "llvm/Transforms/Utils/Local.h"
68#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
69#include <cassert>
70#include <cstddef>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76
77#define DEBUG_TYPE "gvn-sink"
78
79STATISTIC(NumRemoved, "Number of instructions removed");
80
81namespace llvm {
82namespace GVNExpression {
83
84LLVM_DUMP_METHOD void Expression::dump() const {
85 print(OS&: dbgs());
86 dbgs() << "\n";
87}
88
89} // end namespace GVNExpression
90} // end namespace llvm
91
92namespace {
93
94static bool isMemoryInst(const Instruction *I) {
95 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) ||
96 (isa<InvokeInst>(Val: I) && !cast<InvokeInst>(Val: I)->doesNotAccessMemory()) ||
97 (isa<CallInst>(Val: I) && !cast<CallInst>(Val: I)->doesNotAccessMemory());
98}
99
100//===----------------------------------------------------------------------===//
101
102/// Candidate solution for sinking. There may be different ways to
103/// sink instructions, differing in the number of instructions sunk,
104/// the number of predecessors sunk from and the number of PHIs
105/// required.
106struct SinkingInstructionCandidate {
107 unsigned NumBlocks;
108 unsigned NumInstructions;
109 unsigned NumPHIs;
110 unsigned NumMemoryInsts;
111 int Cost = -1;
112 SmallVector<BasicBlock *, 4> Blocks;
113
114 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
115 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
116 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
117 Cost = (NumInstructions * (NumBlocks - 1)) -
118 (NumExtraPHIs *
119 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
120 - SplitEdgeCost;
121 }
122
123 bool operator>(const SinkingInstructionCandidate &Other) const {
124 return Cost > Other.Cost;
125 }
126};
127
128#ifndef NDEBUG
129raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
130 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
131 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
132 return OS;
133}
134#endif
135
136//===----------------------------------------------------------------------===//
137
138/// Describes a PHI node that may or may not exist. These track the PHIs
139/// that must be created if we sunk a sequence of instructions. It provides
140/// a hash function for efficient equality comparisons.
141class ModelledPHI {
142 SmallVector<Value *, 4> Values;
143 SmallVector<BasicBlock *, 4> Blocks;
144
145public:
146 ModelledPHI() = default;
147
148 ModelledPHI(const PHINode *PN,
149 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
150 // BasicBlock comes first so we sort by basic block pointer order,
151 // then by value pointer order. No need to call `verifyModelledPHI`
152 // As the Values and Blocks are populated in a deterministic order.
153 using OpsType = std::pair<BasicBlock *, Value *>;
154 SmallVector<OpsType, 4> Ops;
155 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
156 Ops.push_back(Elt: {PN->getIncomingBlock(i: I), PN->getIncomingValue(i: I)});
157
158 auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
159 return BlockOrder.lookup(Val: O1.first) < BlockOrder.lookup(Val: O2.first);
160 };
161 // Sort in a deterministic order.
162 llvm::sort(C&: Ops, Comp: ComesBefore);
163
164 for (auto &P : Ops) {
165 Blocks.push_back(Elt: P.first);
166 Values.push_back(Elt: P.second);
167 }
168 }
169
170 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
171 /// without the same ID.
172 /// \note This is specifically for DenseMapInfo - do not use this!
173 static ModelledPHI createDummy(size_t ID) {
174 ModelledPHI M;
175 M.Values.push_back(Elt: reinterpret_cast<Value*>(ID));
176 return M;
177 }
178
179 void
180 verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
181 assert(Values.size() > 1 && Blocks.size() > 1 &&
182 "Modelling PHI with less than 2 values");
183 auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
184 const BasicBlock *BB2) {
185 return BlockOrder.lookup(Val: BB1) < BlockOrder.lookup(Val: BB2);
186 };
187 assert(llvm::is_sorted(Blocks, ComesBefore));
188 int C = 0;
189 for (const Value *V : Values) {
190 if (!isa<UndefValue>(Val: V)) {
191 assert(cast<Instruction>(V)->getParent() == Blocks[C]);
192 (void)C;
193 }
194 C++;
195 }
196 }
197 /// Create a PHI from an array of incoming values and incoming blocks.
198 ModelledPHI(SmallVectorImpl<Instruction *> &V,
199 SmallSetVector<BasicBlock *, 4> &B,
200 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
201 // The order of Values and Blocks are already ordered by the caller.
202 llvm::append_range(C&: Values, R&: V);
203 llvm::append_range(C&: Blocks, R&: B);
204 verifyModelledPHI(BlockOrder);
205 }
206
207 /// Create a PHI from [I[OpNum] for I in Insts].
208 /// TODO: Figure out a way to verifyModelledPHI in this constructor.
209 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
210 SmallSetVector<BasicBlock *, 4> &B) {
211 llvm::append_range(C&: Blocks, R&: B);
212 for (auto *I : Insts)
213 Values.push_back(Elt: I->getOperand(i: OpNum));
214 }
215
216 /// Restrict the PHI's contents down to only \c NewBlocks.
217 /// \c NewBlocks must be a subset of \c this->Blocks.
218 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
219 auto BI = Blocks.begin();
220 auto VI = Values.begin();
221 while (BI != Blocks.end()) {
222 assert(VI != Values.end());
223 if (!NewBlocks.contains(key: *BI)) {
224 BI = Blocks.erase(CI: BI);
225 VI = Values.erase(CI: VI);
226 } else {
227 ++BI;
228 ++VI;
229 }
230 }
231 assert(Blocks.size() == NewBlocks.size());
232 }
233
234 ArrayRef<Value *> getValues() const { return Values; }
235
236 bool areAllIncomingValuesSame() const {
237 return llvm::all_equal(Range: Values);
238 }
239
240 bool areAllIncomingValuesSameType() const {
241 return llvm::all_of(
242 Range: Values, P: [&](Value *V) { return V->getType() == Values[0]->getType(); });
243 }
244
245 bool areAnyIncomingValuesConstant() const {
246 return llvm::any_of(Range: Values, P: [&](Value *V) { return isa<Constant>(Val: V); });
247 }
248
249 // Hash functor
250 unsigned hash() const {
251 // Is deterministic because Values are saved in a specific order.
252 return (unsigned)hash_combine_range(R: Values);
253 }
254
255 bool operator==(const ModelledPHI &Other) const {
256 return Values == Other.Values && Blocks == Other.Blocks;
257 }
258};
259
260template <typename ModelledPHI> struct DenseMapInfo {
261 static inline ModelledPHI &getEmptyKey() {
262 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
263 return Dummy;
264 }
265
266 static inline ModelledPHI &getTombstoneKey() {
267 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
268 return Dummy;
269 }
270
271 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
272
273 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
274 return LHS == RHS;
275 }
276};
277
278using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
279
280//===----------------------------------------------------------------------===//
281// ValueTable
282//===----------------------------------------------------------------------===//
283// This is a value number table where the value number is a function of the
284// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
285// that the program would be equivalent if we replaced A with PHI(A, B).
286//===----------------------------------------------------------------------===//
287
288/// A GVN expression describing how an instruction is used. The operands
289/// field of BasicExpression is used to store uses, not operands.
290///
291/// This class also contains fields for discriminators used when determining
292/// equivalence of instructions with sideeffects.
293class InstructionUseExpr : public GVNExpression::BasicExpression {
294 unsigned MemoryUseOrder = -1;
295 bool Volatile = false;
296 ArrayRef<int> ShuffleMask;
297
298public:
299 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
300 BumpPtrAllocator &A)
301 : GVNExpression::BasicExpression(I->getNumUses()) {
302 allocateOperands(Recycler&: R, Allocator&: A);
303 setOpcode(I->getOpcode());
304 setType(I->getType());
305
306 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Val: I))
307 ShuffleMask = SVI->getShuffleMask().copy(A);
308
309 for (auto &U : I->uses())
310 op_push_back(Arg: U.getUser());
311 llvm::sort(Start: op_begin(), End: op_end());
312 }
313
314 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
315 void setVolatile(bool V) { Volatile = V; }
316
317 hash_code getHashValue() const override {
318 return hash_combine(args: GVNExpression::BasicExpression::getHashValue(),
319 args: MemoryUseOrder, args: Volatile, args: ShuffleMask);
320 }
321
322 template <typename Function> hash_code getHashValue(Function MapFn) {
323 hash_code H = hash_combine(args: getOpcode(), args: getType(), args: MemoryUseOrder, args: Volatile,
324 args: ShuffleMask);
325 for (auto *V : operands())
326 H = hash_combine(H, MapFn(V));
327 return H;
328 }
329};
330
331using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
332
333class ValueTable {
334 DenseMap<Value *, uint32_t> ValueNumbering;
335 DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
336 DenseMap<size_t, uint32_t> HashNumbering;
337 BumpPtrAllocator Allocator;
338 ArrayRecycler<Value *> Recycler;
339 uint32_t nextValueNumber = 1;
340 BasicBlocksSet ReachableBBs;
341
342 /// Create an expression for I based on its opcode and its uses. If I
343 /// touches or reads memory, the expression is also based upon its memory
344 /// order - see \c getMemoryUseOrder().
345 InstructionUseExpr *createExpr(Instruction *I) {
346 InstructionUseExpr *E =
347 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
348 if (isMemoryInst(I))
349 E->setMemoryUseOrder(getMemoryUseOrder(Inst: I));
350
351 if (CmpInst *C = dyn_cast<CmpInst>(Val: I)) {
352 CmpInst::Predicate Predicate = C->getPredicate();
353 E->setOpcode((C->getOpcode() << 8) | Predicate);
354 }
355 return E;
356 }
357
358 /// Helper to compute the value number for a memory instruction
359 /// (LoadInst/StoreInst), including checking the memory ordering and
360 /// volatility.
361 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
362 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
363 return nullptr;
364 InstructionUseExpr *E = createExpr(I);
365 E->setVolatile(I->isVolatile());
366 return E;
367 }
368
369public:
370 ValueTable() = default;
371
372 /// Set basic blocks reachable from entry block.
373 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
374 this->ReachableBBs = ReachableBBs;
375 }
376
377 /// Returns the value number for the specified value, assigning
378 /// it a new number if it did not have one before.
379 uint32_t lookupOrAdd(Value *V) {
380 auto VI = ValueNumbering.find(Val: V);
381 if (VI != ValueNumbering.end())
382 return VI->second;
383
384 if (!isa<Instruction>(Val: V)) {
385 ValueNumbering[V] = nextValueNumber;
386 return nextValueNumber++;
387 }
388
389 Instruction *I = cast<Instruction>(Val: V);
390 if (!ReachableBBs.contains(Ptr: I->getParent()))
391 return ~0U;
392
393 InstructionUseExpr *exp = nullptr;
394 switch (I->getOpcode()) {
395 case Instruction::Load:
396 exp = createMemoryExpr(I: cast<LoadInst>(Val: I));
397 break;
398 case Instruction::Store:
399 exp = createMemoryExpr(I: cast<StoreInst>(Val: I));
400 break;
401 case Instruction::Call:
402 case Instruction::Invoke:
403 case Instruction::FNeg:
404 case Instruction::Add:
405 case Instruction::FAdd:
406 case Instruction::Sub:
407 case Instruction::FSub:
408 case Instruction::Mul:
409 case Instruction::FMul:
410 case Instruction::UDiv:
411 case Instruction::SDiv:
412 case Instruction::FDiv:
413 case Instruction::URem:
414 case Instruction::SRem:
415 case Instruction::FRem:
416 case Instruction::Shl:
417 case Instruction::LShr:
418 case Instruction::AShr:
419 case Instruction::And:
420 case Instruction::Or:
421 case Instruction::Xor:
422 case Instruction::ICmp:
423 case Instruction::FCmp:
424 case Instruction::Trunc:
425 case Instruction::ZExt:
426 case Instruction::SExt:
427 case Instruction::FPToUI:
428 case Instruction::FPToSI:
429 case Instruction::UIToFP:
430 case Instruction::SIToFP:
431 case Instruction::FPTrunc:
432 case Instruction::FPExt:
433 case Instruction::PtrToInt:
434 case Instruction::IntToPtr:
435 case Instruction::BitCast:
436 case Instruction::AddrSpaceCast:
437 case Instruction::Select:
438 case Instruction::ExtractElement:
439 case Instruction::InsertElement:
440 case Instruction::ShuffleVector:
441 case Instruction::InsertValue:
442 case Instruction::GetElementPtr:
443 exp = createExpr(I);
444 break;
445 default:
446 break;
447 }
448
449 if (!exp) {
450 ValueNumbering[V] = nextValueNumber;
451 return nextValueNumber++;
452 }
453
454 uint32_t e = ExpressionNumbering[exp];
455 if (!e) {
456 hash_code H = exp->getHashValue(MapFn: [=](Value *V) { return lookupOrAdd(V); });
457 auto [I, Inserted] = HashNumbering.try_emplace(Key: H, Args&: nextValueNumber);
458 e = I->second;
459 if (Inserted)
460 ExpressionNumbering[exp] = nextValueNumber++;
461 }
462 ValueNumbering[V] = e;
463 return e;
464 }
465
466 /// Returns the value number of the specified value. Fails if the value has
467 /// not yet been numbered.
468 uint32_t lookup(Value *V) const {
469 auto VI = ValueNumbering.find(Val: V);
470 assert(VI != ValueNumbering.end() && "Value not numbered?");
471 return VI->second;
472 }
473
474 /// Removes all value numberings and resets the value table.
475 void clear() {
476 ValueNumbering.clear();
477 ExpressionNumbering.clear();
478 HashNumbering.clear();
479 Recycler.clear(Allocator);
480 nextValueNumber = 1;
481 }
482
483 /// \c Inst uses or touches memory. Return an ID describing the memory state
484 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
485 /// the exact same memory operations happen after I1 and I2.
486 ///
487 /// This is a very hard problem in general, so we use domain-specific
488 /// knowledge that we only ever check for equivalence between blocks sharing a
489 /// single immediate successor that is common, and when determining if I1 ==
490 /// I2 we will have already determined that next(I1) == next(I2). This
491 /// inductive property allows us to simply return the value number of the next
492 /// instruction that defines memory.
493 uint32_t getMemoryUseOrder(Instruction *Inst) {
494 auto *BB = Inst->getParent();
495 for (auto I = std::next(x: Inst->getIterator()), E = BB->end();
496 I != E && !I->isTerminator(); ++I) {
497 if (!isMemoryInst(I: &*I))
498 continue;
499 if (isa<LoadInst>(Val: &*I))
500 continue;
501 CallInst *CI = dyn_cast<CallInst>(Val: &*I);
502 if (CI && CI->onlyReadsMemory())
503 continue;
504 InvokeInst *II = dyn_cast<InvokeInst>(Val: &*I);
505 if (II && II->onlyReadsMemory())
506 continue;
507 return lookupOrAdd(V: &*I);
508 }
509 return 0;
510 }
511};
512
513//===----------------------------------------------------------------------===//
514
515class GVNSink {
516public:
517 GVNSink() {}
518
519 bool run(Function &F) {
520 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
521 << "\n");
522
523 unsigned NumSunk = 0;
524 ReversePostOrderTraversal<Function*> RPOT(&F);
525 VN.setReachableBBs(BasicBlocksSet(llvm::from_range, RPOT));
526 // Populate reverse post-order to order basic blocks in deterministic
527 // order. Any arbitrary ordering will work in this case as long as they are
528 // deterministic. The node ordering of newly created basic blocks
529 // are irrelevant because RPOT(for computing sinkable candidates) is also
530 // obtained ahead of time and only their order are relevant for this pass.
531 unsigned NodeOrdering = 0;
532 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
533 for (auto *BB : RPOT)
534 if (!pred_empty(BB))
535 RPOTOrder[BB] = ++NodeOrdering;
536 for (auto *N : RPOT)
537 NumSunk += sinkBB(BBEnd: N);
538
539 return NumSunk > 0;
540 }
541
542private:
543 ValueTable VN;
544 DenseMap<const BasicBlock *, unsigned> RPOTOrder;
545
546 bool shouldAvoidSinkingInstruction(Instruction *I) {
547 // These instructions may change or break semantics if moved.
548 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
549 I->getType()->isTokenTy())
550 return true;
551 return false;
552 }
553
554 /// The main heuristic function. Analyze the set of instructions pointed to by
555 /// LRI and return a candidate solution if these instructions can be sunk, or
556 /// std::nullopt otherwise.
557 std::optional<SinkingInstructionCandidate>
558 analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
559 unsigned &InstNum, unsigned &MemoryInstNum,
560 ModelledPHISet &NeededPHIs,
561 SmallPtrSetImpl<Value *> &PHIContents);
562
563 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
564 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
565 SmallPtrSetImpl<Value *> &PHIContents) {
566 for (PHINode &PN : BB->phis()) {
567 auto MPHI = ModelledPHI(&PN, RPOTOrder);
568 PHIs.insert(V: MPHI);
569 PHIContents.insert_range(R: MPHI.getValues());
570 }
571 }
572
573 /// The main instruction sinking driver. Set up state and try and sink
574 /// instructions into BBEnd from its predecessors.
575 unsigned sinkBB(BasicBlock *BBEnd);
576
577 /// Perform the actual mechanics of sinking an instruction from Blocks into
578 /// BBEnd, which is their only successor.
579 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
580
581 /// Remove PHIs that all have the same incoming value.
582 void foldPointlessPHINodes(BasicBlock *BB) {
583 auto I = BB->begin();
584 while (PHINode *PN = dyn_cast<PHINode>(Val: I++)) {
585 if (!llvm::all_of(Range: PN->incoming_values(), P: [&](const Value *V) {
586 return V == PN->getIncomingValue(i: 0);
587 }))
588 continue;
589 if (PN->getIncomingValue(i: 0) != PN)
590 PN->replaceAllUsesWith(V: PN->getIncomingValue(i: 0));
591 else
592 PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
593 PN->eraseFromParent();
594 }
595 }
596};
597
598std::optional<SinkingInstructionCandidate>
599GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
600 unsigned &InstNum,
601 unsigned &MemoryInstNum,
602 ModelledPHISet &NeededPHIs,
603 SmallPtrSetImpl<Value *> &PHIContents) {
604 auto Insts = *LRI;
605 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
606 : Insts) {
607 I->dump();
608 } dbgs() << " ]\n";);
609
610 DenseMap<uint32_t, unsigned> VNums;
611 for (auto *I : Insts) {
612 uint32_t N = VN.lookupOrAdd(V: I);
613 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
614 if (N == ~0U)
615 return std::nullopt;
616 VNums[N]++;
617 }
618 unsigned VNumToSink = llvm::max_element(Range&: VNums, C: llvm::less_second())->first;
619
620 if (VNums[VNumToSink] == 1)
621 // Can't sink anything!
622 return std::nullopt;
623
624 // Now restrict the number of incoming blocks down to only those with
625 // VNumToSink.
626 auto &ActivePreds = LRI.getActiveBlocks();
627 unsigned InitialActivePredSize = ActivePreds.size();
628 SmallVector<Instruction *, 4> NewInsts;
629 for (auto *I : Insts) {
630 if (VN.lookup(V: I) != VNumToSink)
631 ActivePreds.remove(X: I->getParent());
632 else
633 NewInsts.push_back(Elt: I);
634 }
635 for (auto *I : NewInsts)
636 if (shouldAvoidSinkingInstruction(I))
637 return std::nullopt;
638
639 // If we've restricted the incoming blocks, restrict all needed PHIs also
640 // to that set.
641 bool RecomputePHIContents = false;
642 if (ActivePreds.size() != InitialActivePredSize) {
643 ModelledPHISet NewNeededPHIs;
644 for (auto P : NeededPHIs) {
645 P.restrictToBlocks(NewBlocks: ActivePreds);
646 NewNeededPHIs.insert(V: P);
647 }
648 NeededPHIs = NewNeededPHIs;
649 LRI.restrictToBlocks(Blocks&: ActivePreds);
650 RecomputePHIContents = true;
651 }
652
653 // The sunk instruction's results.
654 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
655
656 // Does sinking this instruction render previous PHIs redundant?
657 if (NeededPHIs.erase(V: NewPHI))
658 RecomputePHIContents = true;
659
660 if (RecomputePHIContents) {
661 // The needed PHIs have changed, so recompute the set of all needed
662 // values.
663 PHIContents.clear();
664 for (auto &PHI : NeededPHIs)
665 PHIContents.insert_range(R: PHI.getValues());
666 }
667
668 // Is this instruction required by a later PHI that doesn't match this PHI?
669 // if so, we can't sink this instruction.
670 for (auto *V : NewPHI.getValues())
671 if (PHIContents.count(Ptr: V))
672 // V exists in this PHI, but the whole PHI is different to NewPHI
673 // (else it would have been removed earlier). We cannot continue
674 // because this isn't representable.
675 return std::nullopt;
676
677 // Which operands need PHIs?
678 // FIXME: If any of these fail, we should partition up the candidates to
679 // try and continue making progress.
680 Instruction *I0 = NewInsts[0];
681
682 auto isNotSameOperation = [&I0](Instruction *I) {
683 return !I0->isSameOperationAs(I);
684 };
685
686 if (any_of(Range&: NewInsts, P: isNotSameOperation))
687 return std::nullopt;
688
689 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
690 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
691 if (PHI.areAllIncomingValuesSame())
692 continue;
693 if (!canReplaceOperandWithVariable(I: I0, OpIdx: OpNum))
694 // We can 't create a PHI from this instruction!
695 return std::nullopt;
696 if (NeededPHIs.count(V: PHI))
697 continue;
698 if (!PHI.areAllIncomingValuesSameType())
699 return std::nullopt;
700 // Don't create indirect calls! The called value is the final operand.
701 if ((isa<CallInst>(Val: I0) || isa<InvokeInst>(Val: I0)) && OpNum == E - 1 &&
702 PHI.areAnyIncomingValuesConstant())
703 return std::nullopt;
704
705 NeededPHIs.reserve(Size: NeededPHIs.size());
706 NeededPHIs.insert(V: PHI);
707 PHIContents.insert_range(R: PHI.getValues());
708 }
709
710 if (isMemoryInst(I: NewInsts[0]))
711 ++MemoryInstNum;
712
713 SinkingInstructionCandidate Cand;
714 Cand.NumInstructions = ++InstNum;
715 Cand.NumMemoryInsts = MemoryInstNum;
716 Cand.NumBlocks = ActivePreds.size();
717 Cand.NumPHIs = NeededPHIs.size();
718 append_range(C&: Cand.Blocks, R&: ActivePreds);
719
720 return Cand;
721}
722
723unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
724 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
725 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
726 SmallVector<BasicBlock *, 4> Preds;
727 for (auto *B : predecessors(BB: BBEnd)) {
728 // Bailout on basic blocks without predecessor(PR42346).
729 if (!RPOTOrder.count(Val: B))
730 return 0;
731 auto *T = B->getTerminator();
732 if (isa<BranchInst>(Val: T) || isa<SwitchInst>(Val: T))
733 Preds.push_back(Elt: B);
734 else
735 return 0;
736 }
737 if (Preds.size() < 2)
738 return 0;
739 auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
740 return RPOTOrder.lookup(Val: BB1) < RPOTOrder.lookup(Val: BB2);
741 };
742 // Sort in a deterministic order.
743 llvm::sort(C&: Preds, Comp: ComesBefore);
744
745 unsigned NumOrigPreds = Preds.size();
746 // We can only sink instructions through unconditional branches.
747 llvm::erase_if(C&: Preds, P: [](BasicBlock *BB) {
748 return BB->getTerminator()->getNumSuccessors() != 1;
749 });
750
751 LockstepReverseIterator<false> LRI(Preds);
752 SmallVector<SinkingInstructionCandidate, 4> Candidates;
753 unsigned InstNum = 0, MemoryInstNum = 0;
754 ModelledPHISet NeededPHIs;
755 SmallPtrSet<Value *, 4> PHIContents;
756 analyzeInitialPHIs(BB: BBEnd, PHIs&: NeededPHIs, PHIContents);
757 unsigned NumOrigPHIs = NeededPHIs.size();
758
759 while (LRI.isValid()) {
760 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
761 NeededPHIs, PHIContents);
762 if (!Cand)
763 break;
764 Cand->calculateCost(NumOrigPHIs, NumOrigBlocks: Preds.size());
765 Candidates.emplace_back(Args&: *Cand);
766 --LRI;
767 }
768
769 llvm::stable_sort(Range&: Candidates, C: std::greater<SinkingInstructionCandidate>());
770 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
771 : Candidates) dbgs()
772 << " " << C << "\n";);
773
774 // Pick the top candidate, as long it is positive!
775 if (Candidates.empty() || Candidates.front().Cost <= 0)
776 return 0;
777 auto C = Candidates.front();
778
779 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
780 BasicBlock *InsertBB = BBEnd;
781 if (C.Blocks.size() < NumOrigPreds) {
782 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
783 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
784 InsertBB = SplitBlockPredecessors(BB: BBEnd, Preds: C.Blocks, Suffix: ".gvnsink.split");
785 if (!InsertBB) {
786 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
787 // Edge couldn't be split.
788 return 0;
789 }
790 }
791
792 for (unsigned I = 0; I < C.NumInstructions; ++I)
793 sinkLastInstruction(Blocks: C.Blocks, BBEnd: InsertBB);
794
795 return C.NumInstructions;
796}
797
798void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
799 BasicBlock *BBEnd) {
800 SmallVector<Instruction *, 4> Insts;
801 for (BasicBlock *BB : Blocks)
802 Insts.push_back(Elt: BB->getTerminator()->getPrevNonDebugInstruction());
803 Instruction *I0 = Insts.front();
804
805 SmallVector<Value *, 4> NewOperands;
806 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
807 bool NeedPHI = llvm::any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
808 return I->getOperand(i: O) != I0->getOperand(i: O);
809 });
810 if (!NeedPHI) {
811 NewOperands.push_back(Elt: I0->getOperand(i: O));
812 continue;
813 }
814
815 // Create a new PHI in the successor block and populate it.
816 auto *Op = I0->getOperand(i: O);
817 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
818 auto *PN =
819 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
820 PN->insertBefore(InsertPos: BBEnd->begin());
821 for (auto *I : Insts)
822 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
823 NewOperands.push_back(Elt: PN);
824 }
825
826 // Arbitrarily use I0 as the new "common" instruction; remap its operands
827 // and move it to the start of the successor block.
828 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
829 I0->getOperandUse(i: O).set(NewOperands[O]);
830 I0->moveBefore(InsertPos: BBEnd->getFirstInsertionPt());
831
832 // Update metadata and IR flags.
833 for (auto *I : Insts)
834 if (I != I0) {
835 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
836 I0->andIRFlags(V: I);
837 }
838
839 for (auto *I : Insts)
840 if (I != I0) {
841 I->replaceAllUsesWith(V: I0);
842 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
843 }
844 foldPointlessPHINodes(BB: BBEnd);
845
846 // Finally nuke all instructions apart from the common instruction.
847 for (auto *I : Insts)
848 if (I != I0)
849 I->eraseFromParent();
850
851 NumRemoved += Insts.size() - 1;
852}
853
854} // end anonymous namespace
855
856PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
857 GVNSink G;
858 if (!G.run(F))
859 return PreservedAnalyses::all();
860
861 return PreservedAnalyses::none();
862}
863