1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/Hashing.h"
39#include "llvm/ADT/PostOrderIterator.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/SetVector.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Analysis/GlobalsModRef.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
57#include "llvm/Support/Allocator.h"
58#include "llvm/Support/ArrayRecycler.h"
59#include "llvm/Support/AtomicOrdering.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/Compiler.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/Scalar/GVN.h"
65#include "llvm/Transforms/Scalar/GVNExpression.h"
66#include "llvm/Transforms/Utils/BasicBlockUtils.h"
67#include "llvm/Transforms/Utils/Local.h"
68#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
69#include <cassert>
70#include <cstddef>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76using namespace llvm::GVNExpression;
77
78#define DEBUG_TYPE "gvn-sink"
79
80STATISTIC(NumRemoved, "Number of instructions removed");
81
82LLVM_DUMP_METHOD void Expression::dump() const {
83 print(OS&: dbgs());
84 dbgs() << "\n";
85}
86
87static bool isMemoryInst(const Instruction *I) {
88 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) ||
89 (isa<InvokeInst>(Val: I) && !cast<InvokeInst>(Val: I)->doesNotAccessMemory()) ||
90 (isa<CallInst>(Val: I) && !cast<CallInst>(Val: I)->doesNotAccessMemory());
91}
92
93//===----------------------------------------------------------------------===//
94
95namespace {
96
97/// Candidate solution for sinking. There may be different ways to
98/// sink instructions, differing in the number of instructions sunk,
99/// the number of predecessors sunk from and the number of PHIs
100/// required.
101struct SinkingInstructionCandidate {
102 unsigned NumBlocks;
103 unsigned NumInstructions;
104 unsigned NumPHIs;
105 unsigned NumMemoryInsts;
106 int Cost = -1;
107 SmallVector<BasicBlock *, 4> Blocks;
108
109 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
110 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
111 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
112 Cost = (NumInstructions * (NumBlocks - 1)) -
113 (NumExtraPHIs *
114 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
115 - SplitEdgeCost;
116 }
117
118 bool operator>(const SinkingInstructionCandidate &Other) const {
119 return Cost > Other.Cost;
120 }
121};
122
123//===----------------------------------------------------------------------===//
124
125/// Describes a PHI node that may or may not exist. These track the PHIs
126/// that must be created if we sunk a sequence of instructions. It provides
127/// a hash function for efficient equality comparisons.
128class ModelledPHI {
129 SmallVector<Value *, 4> Values;
130 SmallVector<BasicBlock *, 4> Blocks;
131
132public:
133 ModelledPHI() = default;
134
135 ModelledPHI(const PHINode *PN,
136 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
137 // BasicBlock comes first so we sort by basic block pointer order,
138 // then by value pointer order. No need to call `verifyModelledPHI`
139 // As the Values and Blocks are populated in a deterministic order.
140 using OpsType = std::pair<BasicBlock *, Value *>;
141 SmallVector<OpsType, 4> Ops;
142 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
143 Ops.push_back(Elt: {PN->getIncomingBlock(i: I), PN->getIncomingValue(i: I)});
144
145 auto ComesBefore = [&](OpsType O1, OpsType O2) {
146 return BlockOrder.lookup(Val: O1.first) < BlockOrder.lookup(Val: O2.first);
147 };
148 // Sort in a deterministic order.
149 llvm::sort(C&: Ops, Comp: ComesBefore);
150
151 for (auto &P : Ops) {
152 Blocks.push_back(Elt: P.first);
153 Values.push_back(Elt: P.second);
154 }
155 }
156
157 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
158 /// without the same ID.
159 /// \note This is specifically for DenseMapInfo - do not use this!
160 static ModelledPHI createDummy(size_t ID) {
161 ModelledPHI M;
162 M.Values.push_back(Elt: reinterpret_cast<Value*>(ID));
163 return M;
164 }
165
166 void
167 verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
168 assert(Values.size() > 1 && Blocks.size() > 1 &&
169 "Modelling PHI with less than 2 values");
170 [[maybe_unused]] auto ComesBefore = [&](const BasicBlock *BB1,
171 const BasicBlock *BB2) {
172 return BlockOrder.lookup(Val: BB1) < BlockOrder.lookup(Val: BB2);
173 };
174 assert(llvm::is_sorted(Blocks, ComesBefore));
175 int C = 0;
176 for (const Value *V : Values) {
177 if (!isa<UndefValue>(Val: V)) {
178 assert(cast<Instruction>(V)->getParent() == Blocks[C]);
179 (void)C;
180 }
181 C++;
182 }
183 }
184 /// Create a PHI from an array of incoming values and incoming blocks.
185 ModelledPHI(SmallVectorImpl<Instruction *> &V,
186 SmallSetVector<BasicBlock *, 4> &B,
187 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
188 // The order of Values and Blocks are already ordered by the caller.
189 llvm::append_range(C&: Values, R&: V);
190 llvm::append_range(C&: Blocks, R&: B);
191 verifyModelledPHI(BlockOrder);
192 }
193
194 /// Create a PHI from [I[OpNum] for I in Insts].
195 /// TODO: Figure out a way to verifyModelledPHI in this constructor.
196 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
197 SmallSetVector<BasicBlock *, 4> &B) {
198 llvm::append_range(C&: Blocks, R&: B);
199 for (auto *I : Insts)
200 Values.push_back(Elt: I->getOperand(i: OpNum));
201 }
202
203 /// Restrict the PHI's contents down to only \c NewBlocks.
204 /// \c NewBlocks must be a subset of \c this->Blocks.
205 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
206 auto BI = Blocks.begin();
207 auto VI = Values.begin();
208 while (BI != Blocks.end()) {
209 assert(VI != Values.end());
210 if (!NewBlocks.contains(key: *BI)) {
211 BI = Blocks.erase(CI: BI);
212 VI = Values.erase(CI: VI);
213 } else {
214 ++BI;
215 ++VI;
216 }
217 }
218 assert(Blocks.size() == NewBlocks.size());
219 }
220
221 ArrayRef<Value *> getValues() const { return Values; }
222
223 bool areAllIncomingValuesSame() const {
224 return llvm::all_equal(Range: Values);
225 }
226
227 bool areAllIncomingValuesSameType() const {
228 return llvm::all_of(
229 Range: Values, P: [&](Value *V) { return V->getType() == Values[0]->getType(); });
230 }
231
232 bool areAnyIncomingValuesConstant() const {
233 return llvm::any_of(Range: Values, P: [&](Value *V) { return isa<Constant>(Val: V); });
234 }
235
236 // Hash functor
237 unsigned hash() const {
238 // Is deterministic because Values are saved in a specific order.
239 return (unsigned)hash_combine_range(R: Values);
240 }
241
242 bool operator==(const ModelledPHI &Other) const {
243 return Values == Other.Values && Blocks == Other.Blocks;
244 }
245};
246} // namespace
247
248#ifndef NDEBUG
249static raw_ostream &operator<<(raw_ostream &OS,
250 const SinkingInstructionCandidate &C) {
251 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
252 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
253 return OS;
254}
255#endif
256
257template <> struct llvm::DenseMapInfo<ModelledPHI> {
258 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
259
260 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
261 return LHS == RHS;
262 }
263};
264
265using ModelledPHISet = SetVector<ModelledPHI>;
266
267namespace {
268
269//===----------------------------------------------------------------------===//
270// ValueTable
271//===----------------------------------------------------------------------===//
272// This is a value number table where the value number is a function of the
273// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
274// that the program would be equivalent if we replaced A with PHI(A, B).
275//===----------------------------------------------------------------------===//
276
277/// A GVN expression describing how an instruction is used. The operands
278/// field of BasicExpression is used to store uses, not operands.
279///
280/// This class also contains fields for discriminators used when determining
281/// equivalence of instructions with sideeffects.
282class InstructionUseExpr : public BasicExpression {
283 unsigned MemoryUseOrder = -1;
284 bool Volatile = false;
285 ArrayRef<int> ShuffleMask;
286
287public:
288 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
289 BumpPtrAllocator &A)
290 : BasicExpression(I->getNumUses()) {
291 allocateOperands(Recycler&: R, Allocator&: A);
292 setOpcode(I->getOpcode());
293 setType(I->getType());
294
295 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Val: I))
296 ShuffleMask = SVI->getShuffleMask().copy(A);
297
298 for (auto &U : I->uses())
299 op_push_back(Arg: U.getUser());
300 llvm::sort(C: operands());
301 }
302
303 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
304 void setVolatile(bool V) { Volatile = V; }
305
306 hash_code getHashValue() const override {
307 return hash_combine(args: BasicExpression::getHashValue(), args: MemoryUseOrder,
308 args: Volatile, args: ShuffleMask);
309 }
310
311 template <typename Function> hash_code getHashValue(Function MapFn) {
312 hash_code H = hash_combine(args: getOpcode(), args: getType(), args: MemoryUseOrder, args: Volatile,
313 args: ShuffleMask);
314 for (auto *V : operands())
315 H = hash_combine(H, MapFn(V));
316 return H;
317 }
318};
319
320using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
321
322class ValueTable {
323 DenseMap<Value *, uint32_t> ValueNumbering;
324 DenseMap<Expression *, uint32_t> ExpressionNumbering;
325 DenseMap<size_t, uint32_t> HashNumbering;
326 BumpPtrAllocator Allocator;
327 ArrayRecycler<Value *> Recycler;
328 uint32_t nextValueNumber = 1;
329 BasicBlocksSet ReachableBBs;
330
331 /// Create an expression for I based on its opcode and its uses. If I
332 /// touches or reads memory, the expression is also based upon its memory
333 /// order - see \c getMemoryUseOrder().
334 InstructionUseExpr *createExpr(Instruction *I) {
335 InstructionUseExpr *E =
336 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
337 if (isMemoryInst(I))
338 E->setMemoryUseOrder(getMemoryUseOrder(Inst: I));
339
340 if (CmpInst *C = dyn_cast<CmpInst>(Val: I)) {
341 CmpInst::Predicate Predicate = C->getPredicate();
342 E->setOpcode((C->getOpcode() << 8) | Predicate);
343 }
344 return E;
345 }
346
347 /// Helper to compute the value number for a memory instruction
348 /// (LoadInst/StoreInst), including checking the memory ordering and
349 /// volatility.
350 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
351 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
352 return nullptr;
353 InstructionUseExpr *E = createExpr(I);
354 E->setVolatile(I->isVolatile());
355 return E;
356 }
357
358public:
359 ValueTable() = default;
360
361 /// Set basic blocks reachable from entry block.
362 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
363 this->ReachableBBs = ReachableBBs;
364 }
365
366 /// Returns the value number for the specified value, assigning
367 /// it a new number if it did not have one before.
368 uint32_t lookupOrAdd(Value *V) {
369 auto VI = ValueNumbering.find(Val: V);
370 if (VI != ValueNumbering.end())
371 return VI->second;
372
373 if (!isa<Instruction>(Val: V)) {
374 ValueNumbering[V] = nextValueNumber;
375 return nextValueNumber++;
376 }
377
378 Instruction *I = cast<Instruction>(Val: V);
379 if (!ReachableBBs.contains(Ptr: I->getParent()))
380 return ~0U;
381
382 InstructionUseExpr *exp = nullptr;
383 switch (I->getOpcode()) {
384 case Instruction::Load:
385 exp = createMemoryExpr(I: cast<LoadInst>(Val: I));
386 break;
387 case Instruction::Store:
388 exp = createMemoryExpr(I: cast<StoreInst>(Val: I));
389 break;
390 case Instruction::Call:
391 case Instruction::Invoke:
392 case Instruction::FNeg:
393 case Instruction::Add:
394 case Instruction::FAdd:
395 case Instruction::Sub:
396 case Instruction::FSub:
397 case Instruction::Mul:
398 case Instruction::FMul:
399 case Instruction::UDiv:
400 case Instruction::SDiv:
401 case Instruction::FDiv:
402 case Instruction::URem:
403 case Instruction::SRem:
404 case Instruction::FRem:
405 case Instruction::Shl:
406 case Instruction::LShr:
407 case Instruction::AShr:
408 case Instruction::And:
409 case Instruction::Or:
410 case Instruction::Xor:
411 case Instruction::ICmp:
412 case Instruction::FCmp:
413 case Instruction::Trunc:
414 case Instruction::ZExt:
415 case Instruction::SExt:
416 case Instruction::FPToUI:
417 case Instruction::FPToSI:
418 case Instruction::UIToFP:
419 case Instruction::SIToFP:
420 case Instruction::FPTrunc:
421 case Instruction::FPExt:
422 case Instruction::PtrToInt:
423 case Instruction::PtrToAddr:
424 case Instruction::IntToPtr:
425 case Instruction::BitCast:
426 case Instruction::AddrSpaceCast:
427 case Instruction::Select:
428 case Instruction::ExtractElement:
429 case Instruction::InsertElement:
430 case Instruction::ShuffleVector:
431 case Instruction::InsertValue:
432 case Instruction::GetElementPtr:
433 exp = createExpr(I);
434 break;
435 default:
436 break;
437 }
438
439 if (!exp) {
440 ValueNumbering[V] = nextValueNumber;
441 return nextValueNumber++;
442 }
443
444 uint32_t e = ExpressionNumbering[exp];
445 if (!e) {
446 hash_code H = exp->getHashValue(MapFn: [=](Value *V) { return lookupOrAdd(V); });
447 auto [I, Inserted] = HashNumbering.try_emplace(Key: H, Args&: nextValueNumber);
448 e = I->second;
449 if (Inserted)
450 ExpressionNumbering[exp] = nextValueNumber++;
451 }
452 ValueNumbering[V] = e;
453 return e;
454 }
455
456 /// Returns the value number of the specified value. Fails if the value has
457 /// not yet been numbered.
458 uint32_t lookup(Value *V) const {
459 auto VI = ValueNumbering.find(Val: V);
460 assert(VI != ValueNumbering.end() && "Value not numbered?");
461 return VI->second;
462 }
463
464 /// Removes all value numberings and resets the value table.
465 void clear() {
466 ValueNumbering.clear();
467 ExpressionNumbering.clear();
468 HashNumbering.clear();
469 Recycler.clear(Allocator);
470 nextValueNumber = 1;
471 }
472
473 /// \c Inst uses or touches memory. Return an ID describing the memory state
474 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
475 /// the exact same memory operations happen after I1 and I2.
476 ///
477 /// This is a very hard problem in general, so we use domain-specific
478 /// knowledge that we only ever check for equivalence between blocks sharing a
479 /// single immediate successor that is common, and when determining if I1 ==
480 /// I2 we will have already determined that next(I1) == next(I2). This
481 /// inductive property allows us to simply return the value number of the next
482 /// instruction that defines memory.
483 uint32_t getMemoryUseOrder(Instruction *Inst) {
484 auto *BB = Inst->getParent();
485 for (auto I = std::next(x: Inst->getIterator()), E = BB->end();
486 I != E && !I->isTerminator(); ++I) {
487 if (!isMemoryInst(I: &*I))
488 continue;
489 if (isa<LoadInst>(Val: &*I))
490 continue;
491 CallInst *CI = dyn_cast<CallInst>(Val: &*I);
492 if (CI && CI->onlyReadsMemory())
493 continue;
494 InvokeInst *II = dyn_cast<InvokeInst>(Val: &*I);
495 if (II && II->onlyReadsMemory())
496 continue;
497 return lookupOrAdd(V: &*I);
498 }
499 return 0;
500 }
501};
502
503//===----------------------------------------------------------------------===//
504
505class GVNSink {
506public:
507 GVNSink() = default;
508
509 bool run(Function &F) {
510 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
511 << "\n");
512
513 unsigned NumSunk = 0;
514 ReversePostOrderTraversal<Function*> RPOT(&F);
515 VN.setReachableBBs(BasicBlocksSet(llvm::from_range, RPOT));
516 // Populate reverse post-order to order basic blocks in deterministic
517 // order. Any arbitrary ordering will work in this case as long as they are
518 // deterministic. The node ordering of newly created basic blocks
519 // are irrelevant because RPOT(for computing sinkable candidates) is also
520 // obtained ahead of time and only their order are relevant for this pass.
521 unsigned NodeOrdering = 0;
522 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
523 for (auto *BB : RPOT)
524 if (!pred_empty(BB))
525 RPOTOrder[BB] = ++NodeOrdering;
526 for (auto *N : RPOT)
527 NumSunk += sinkBB(BBEnd: N);
528
529 return NumSunk > 0;
530 }
531
532private:
533 ValueTable VN;
534 DenseMap<const BasicBlock *, unsigned> RPOTOrder;
535
536 bool shouldAvoidSinkingInstruction(Instruction *I) {
537 // These instructions may change or break semantics if moved.
538 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
539 I->getType()->isTokenTy())
540 return true;
541 return false;
542 }
543
544 /// The main heuristic function. Analyze the set of instructions pointed to by
545 /// LRI and return a candidate solution if these instructions can be sunk, or
546 /// std::nullopt otherwise.
547 std::optional<SinkingInstructionCandidate>
548 analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
549 unsigned &InstNum, unsigned &MemoryInstNum,
550 ModelledPHISet &NeededPHIs,
551 SmallPtrSetImpl<Value *> &PHIContents);
552
553 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
554 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
555 SmallPtrSetImpl<Value *> &PHIContents) {
556 for (PHINode &PN : BB->phis()) {
557 auto MPHI = ModelledPHI(&PN, RPOTOrder);
558 PHIs.insert(X: MPHI);
559 PHIContents.insert_range(R: MPHI.getValues());
560 }
561 }
562
563 /// The main instruction sinking driver. Set up state and try and sink
564 /// instructions into BBEnd from its predecessors.
565 unsigned sinkBB(BasicBlock *BBEnd);
566
567 /// Perform the actual mechanics of sinking an instruction from Blocks into
568 /// BBEnd, which is their only successor.
569 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
570
571 /// Remove PHIs that all have the same incoming value.
572 void foldPointlessPHINodes(BasicBlock *BB) {
573 auto I = BB->begin();
574 while (PHINode *PN = dyn_cast<PHINode>(Val: I++)) {
575 if (!llvm::all_of(Range: PN->incoming_values(), P: [&](const Value *V) {
576 return V == PN->getIncomingValue(i: 0);
577 }))
578 continue;
579 if (PN->getIncomingValue(i: 0) != PN)
580 PN->replaceAllUsesWith(V: PN->getIncomingValue(i: 0));
581 else
582 PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
583 PN->eraseFromParent();
584 }
585 }
586};
587} // namespace
588
589std::optional<SinkingInstructionCandidate>
590GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
591 unsigned &InstNum,
592 unsigned &MemoryInstNum,
593 ModelledPHISet &NeededPHIs,
594 SmallPtrSetImpl<Value *> &PHIContents) {
595 auto Insts = *LRI;
596 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
597 : Insts) {
598 I->dump();
599 } dbgs() << " ]\n";);
600
601 DenseMap<uint32_t, unsigned> VNums;
602 for (auto *I : Insts) {
603 uint32_t N = VN.lookupOrAdd(V: I);
604 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
605 if (N == ~0U)
606 return std::nullopt;
607 VNums[N]++;
608 }
609 unsigned VNumToSink = llvm::max_element(Range&: VNums, C: llvm::less_second())->first;
610
611 if (VNums[VNumToSink] == 1)
612 // Can't sink anything!
613 return std::nullopt;
614
615 // Now restrict the number of incoming blocks down to only those with
616 // VNumToSink.
617 auto &ActivePreds = LRI.getActiveBlocks();
618 unsigned InitialActivePredSize = ActivePreds.size();
619 SmallVector<Instruction *, 4> NewInsts;
620 for (auto *I : Insts) {
621 if (VN.lookup(V: I) != VNumToSink)
622 ActivePreds.remove(X: I->getParent());
623 else
624 NewInsts.push_back(Elt: I);
625 }
626 for (auto *I : NewInsts)
627 if (shouldAvoidSinkingInstruction(I))
628 return std::nullopt;
629
630 // If we've restricted the incoming blocks, restrict all needed PHIs also
631 // to that set.
632 bool RecomputePHIContents = false;
633 if (ActivePreds.size() != InitialActivePredSize) {
634 ModelledPHISet NewNeededPHIs;
635 for (auto P : NeededPHIs) {
636 P.restrictToBlocks(NewBlocks: ActivePreds);
637 NewNeededPHIs.insert(X: P);
638 }
639 NeededPHIs = NewNeededPHIs;
640 LRI.restrictToBlocks(Blocks&: ActivePreds);
641 RecomputePHIContents = true;
642 }
643
644 // The sunk instruction's results.
645 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
646
647 // Does sinking this instruction render previous PHIs redundant?
648 if (NeededPHIs.remove(X: NewPHI))
649 RecomputePHIContents = true;
650
651 if (RecomputePHIContents) {
652 // The needed PHIs have changed, so recompute the set of all needed
653 // values.
654 PHIContents.clear();
655 for (auto &PHI : NeededPHIs)
656 PHIContents.insert_range(R: PHI.getValues());
657 }
658
659 // Is this instruction required by a later PHI that doesn't match this PHI?
660 // if so, we can't sink this instruction.
661 for (auto *V : NewPHI.getValues())
662 if (PHIContents.count(Ptr: V))
663 // V exists in this PHI, but the whole PHI is different to NewPHI
664 // (else it would have been removed earlier). We cannot continue
665 // because this isn't representable.
666 return std::nullopt;
667
668 // Which operands need PHIs?
669 // FIXME: If any of these fail, we should partition up the candidates to
670 // try and continue making progress.
671 Instruction *I0 = NewInsts[0];
672
673 auto isNotSameOperation = [&I0](Instruction *I) {
674 return !I0->isSameOperationAs(I);
675 };
676
677 if (any_of(Range&: NewInsts, P: isNotSameOperation))
678 return std::nullopt;
679
680 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
681 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
682 if (PHI.areAllIncomingValuesSame())
683 continue;
684 if (!canReplaceOperandWithVariable(I: I0, OpIdx: OpNum))
685 // We can 't create a PHI from this instruction!
686 return std::nullopt;
687 if (NeededPHIs.count(key: PHI))
688 continue;
689 if (!PHI.areAllIncomingValuesSameType())
690 return std::nullopt;
691 // Don't create indirect calls! The called value is the final operand.
692 if ((isa<CallInst>(Val: I0) || isa<InvokeInst>(Val: I0)) && OpNum == E - 1 &&
693 PHI.areAnyIncomingValuesConstant())
694 return std::nullopt;
695
696 NeededPHIs.insert(X: PHI);
697 PHIContents.insert_range(R: PHI.getValues());
698 }
699
700 if (isMemoryInst(I: NewInsts[0]))
701 ++MemoryInstNum;
702
703 SinkingInstructionCandidate Cand;
704 Cand.NumInstructions = ++InstNum;
705 Cand.NumMemoryInsts = MemoryInstNum;
706 Cand.NumBlocks = ActivePreds.size();
707 Cand.NumPHIs = NeededPHIs.size();
708 append_range(C&: Cand.Blocks, R&: ActivePreds);
709
710 return Cand;
711}
712
713unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
714 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
715 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
716 SmallVector<BasicBlock *, 4> Preds;
717 for (auto *B : predecessors(BB: BBEnd)) {
718 // Bailout on basic blocks without predecessor(PR42346).
719 if (!RPOTOrder.count(Val: B))
720 return 0;
721 auto *T = B->getTerminator();
722 if (isa<UncondBrInst, CondBrInst, SwitchInst>(Val: T))
723 Preds.push_back(Elt: B);
724 else
725 return 0;
726 }
727 if (Preds.size() < 2)
728 return 0;
729 auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
730 return RPOTOrder.lookup(Val: BB1) < RPOTOrder.lookup(Val: BB2);
731 };
732 // Sort in a deterministic order.
733 llvm::sort(C&: Preds, Comp: ComesBefore);
734
735 unsigned NumOrigPreds = Preds.size();
736 // We can only sink instructions through unconditional branches.
737 llvm::erase_if(C&: Preds, P: [](BasicBlock *BB) {
738 return BB->getTerminator()->getNumSuccessors() != 1;
739 });
740
741 LockstepReverseIterator<false> LRI(Preds);
742 SmallVector<SinkingInstructionCandidate, 4> Candidates;
743 unsigned InstNum = 0, MemoryInstNum = 0;
744 ModelledPHISet NeededPHIs;
745 SmallPtrSet<Value *, 4> PHIContents;
746 analyzeInitialPHIs(BB: BBEnd, PHIs&: NeededPHIs, PHIContents);
747 unsigned NumOrigPHIs = NeededPHIs.size();
748
749 while (LRI.isValid()) {
750 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
751 NeededPHIs, PHIContents);
752 if (!Cand)
753 break;
754 Cand->calculateCost(NumOrigPHIs, NumOrigBlocks: Preds.size());
755 Candidates.emplace_back(Args&: *Cand);
756 --LRI;
757 }
758
759 llvm::stable_sort(Range&: Candidates, C: std::greater<SinkingInstructionCandidate>());
760 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
761 : Candidates) dbgs()
762 << " " << C << "\n";);
763
764 // Pick the top candidate, as long it is positive!
765 if (Candidates.empty() || Candidates.front().Cost <= 0)
766 return 0;
767 auto C = Candidates.front();
768
769 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
770 BasicBlock *InsertBB = BBEnd;
771 if (C.Blocks.size() < NumOrigPreds) {
772 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
773 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
774 InsertBB = SplitBlockPredecessors(BB: BBEnd, Preds: C.Blocks, Suffix: ".gvnsink.split");
775 if (!InsertBB) {
776 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
777 // Edge couldn't be split.
778 return 0;
779 }
780 }
781
782 for (unsigned I = 0; I < C.NumInstructions; ++I)
783 sinkLastInstruction(Blocks: C.Blocks, BBEnd: InsertBB);
784
785 return C.NumInstructions;
786}
787
788void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
789 BasicBlock *BBEnd) {
790 SmallVector<Instruction *, 4> Insts;
791 for (BasicBlock *BB : Blocks)
792 Insts.push_back(Elt: BB->getTerminator()->getPrevNode());
793 Instruction *I0 = Insts.front();
794
795 SmallVector<Value *, 4> NewOperands;
796 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
797 bool NeedPHI = llvm::any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
798 return I->getOperand(i: O) != I0->getOperand(i: O);
799 });
800 if (!NeedPHI) {
801 NewOperands.push_back(Elt: I0->getOperand(i: O));
802 continue;
803 }
804
805 // Create a new PHI in the successor block and populate it.
806 auto *Op = I0->getOperand(i: O);
807 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
808 auto *PN =
809 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
810 PN->insertBefore(InsertPos: BBEnd->begin());
811 for (auto *I : Insts)
812 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
813 NewOperands.push_back(Elt: PN);
814 }
815
816 // Arbitrarily use I0 as the new "common" instruction; remap its operands
817 // and move it to the start of the successor block.
818 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
819 I0->getOperandUse(i: O).set(NewOperands[O]);
820 I0->moveBefore(InsertPos: BBEnd->getFirstInsertionPt());
821
822 // Update metadata and IR flags.
823 for (auto *I : Insts)
824 if (I != I0) {
825 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
826 I0->andIRFlags(V: I);
827 }
828
829 for (auto *I : Insts)
830 if (I != I0) {
831 I->replaceAllUsesWith(V: I0);
832 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
833 }
834 foldPointlessPHINodes(BB: BBEnd);
835
836 // Finally nuke all instructions apart from the common instruction.
837 for (auto *I : Insts)
838 if (I != I0)
839 I->eraseFromParent();
840
841 NumRemoved += Insts.size() - 1;
842}
843
844PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
845 GVNSink G;
846 if (!G.run(F))
847 return PreservedAnalyses::all();
848
849 return PreservedAnalyses::none();
850}
851