1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/Scalar/GVN.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DepthFirstIterator.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/AliasAnalysis.h"
29#include "llvm/Analysis/AssumeBundleQueries.h"
30#include "llvm/Analysis/AssumptionCache.h"
31#include "llvm/Analysis/CFG.h"
32#include "llvm/Analysis/DomTreeUpdater.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/InstructionPrecedenceTracking.h"
35#include "llvm/Analysis/InstructionSimplify.h"
36#include "llvm/Analysis/Loads.h"
37#include "llvm/Analysis/LoopInfo.h"
38#include "llvm/Analysis/MemoryBuiltins.h"
39#include "llvm/Analysis/MemoryDependenceAnalysis.h"
40#include "llvm/Analysis/MemorySSA.h"
41#include "llvm/Analysis/MemorySSAUpdater.h"
42#include "llvm/Analysis/OptimizationRemarkEmitter.h"
43#include "llvm/Analysis/PHITransAddr.h"
44#include "llvm/Analysis/TargetLibraryInfo.h"
45#include "llvm/Analysis/ValueTracking.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/IR/PatternMatch.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/InitializePasses.h"
66#include "llvm/Pass.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include "llvm/Transforms/Utils/SSAUpdater.h"
76#include "llvm/Transforms/Utils/VNCoercion.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(Val: true), cl::Hidden);
109static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(Val: true));
110static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111 cl::init(Val: true));
112static cl::opt<bool>
113GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 cl::init(Val: false));
115static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(Val: true));
116static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
117 cl::init(Val: false));
118
119static cl::opt<uint32_t> MaxNumDeps(
120 "gvn-max-num-deps", cl::Hidden, cl::init(Val: 100),
121 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
122
123// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
124static cl::opt<uint32_t> MaxBBSpeculations(
125 "gvn-max-block-speculations", cl::Hidden, cl::init(Val: 600),
126 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
127 "into) when deducing if a value is fully available or not in GVN "
128 "(default = 600)"));
129
130static cl::opt<uint32_t> MaxNumVisitedInsts(
131 "gvn-max-num-visited-insts", cl::Hidden, cl::init(Val: 100),
132 cl::desc("Max number of visited instructions when trying to find "
133 "dominating value of select dependency (default = 100)"));
134
135static cl::opt<uint32_t> MaxNumInsnsPerBlock(
136 "gvn-max-num-insns", cl::Hidden, cl::init(Val: 100),
137 cl::desc("Max number of instructions to scan in each basic block in GVN "
138 "(default = 100)"));
139
140struct llvm::GVNPass::Expression {
141 uint32_t Opcode;
142 bool Commutative = false;
143 // The type is not necessarily the result type of the expression, it may be
144 // any additional type needed to disambiguate the expression.
145 Type *Ty = nullptr;
146 SmallVector<uint32_t, 4> VarArgs;
147
148 AttributeList Attrs;
149
150 Expression(uint32_t Op = ~2U) : Opcode(Op) {}
151
152 bool operator==(const Expression &Other) const {
153 if (Opcode != Other.Opcode)
154 return false;
155 if (Opcode == ~0U || Opcode == ~1U)
156 return true;
157 if (Ty != Other.Ty)
158 return false;
159 if (VarArgs != Other.VarArgs)
160 return false;
161 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
162 !Attrs.intersectWith(C&: Ty->getContext(), Other: Other.Attrs).has_value())
163 return false;
164 return true;
165 }
166
167 friend hash_code hash_value(const Expression &Value) {
168 return hash_combine(args: Value.Opcode, args: Value.Ty,
169 args: hash_combine_range(R: Value.VarArgs));
170 }
171};
172
173template <> struct llvm::DenseMapInfo<GVNPass::Expression> {
174 static inline GVNPass::Expression getEmptyKey() { return ~0U; }
175 static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
176
177 static unsigned getHashValue(const GVNPass::Expression &E) {
178 using llvm::hash_value;
179
180 return static_cast<unsigned>(hash_value(Value: E));
181 }
182
183 static bool isEqual(const GVNPass::Expression &LHS,
184 const GVNPass::Expression &RHS) {
185 return LHS == RHS;
186 }
187};
188
189/// Represents a particular available value that we know how to materialize.
190/// Materialization of an AvailableValue never fails. An AvailableValue is
191/// implicitly associated with a rematerialization point which is the
192/// location of the instruction from which it was formed.
193struct llvm::gvn::AvailableValue {
194 enum class ValType {
195 SimpleVal, // A simple offsetted value that is accessed.
196 LoadVal, // A value produced by a load.
197 MemIntrin, // A memory intrinsic which is loaded from.
198 UndefVal, // A UndefValue representing a value from dead block (which
199 // is not yet physically removed from the CFG).
200 SelectVal, // A pointer select which is loaded from and for which the load
201 // can be replace by a value select.
202 };
203
204 /// Val - The value that is live out of the block.
205 Value *Val;
206 /// Kind of the live-out value.
207 ValType Kind;
208
209 /// Offset - The byte offset in Val that is interesting for the load query.
210 unsigned Offset = 0;
211 /// V1, V2 - The dominating non-clobbered values of SelectVal.
212 Value *V1 = nullptr, *V2 = nullptr;
213
214 static AvailableValue get(Value *V, unsigned Offset = 0) {
215 AvailableValue Res;
216 Res.Val = V;
217 Res.Kind = ValType::SimpleVal;
218 Res.Offset = Offset;
219 return Res;
220 }
221
222 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
223 AvailableValue Res;
224 Res.Val = MI;
225 Res.Kind = ValType::MemIntrin;
226 Res.Offset = Offset;
227 return Res;
228 }
229
230 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
231 AvailableValue Res;
232 Res.Val = Load;
233 Res.Kind = ValType::LoadVal;
234 Res.Offset = Offset;
235 return Res;
236 }
237
238 static AvailableValue getUndef() {
239 AvailableValue Res;
240 Res.Val = nullptr;
241 Res.Kind = ValType::UndefVal;
242 Res.Offset = 0;
243 return Res;
244 }
245
246 static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
247 AvailableValue Res;
248 Res.Val = Sel;
249 Res.Kind = ValType::SelectVal;
250 Res.Offset = 0;
251 Res.V1 = V1;
252 Res.V2 = V2;
253 return Res;
254 }
255
256 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
257 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
258 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
259 bool isUndefValue() const { return Kind == ValType::UndefVal; }
260 bool isSelectValue() const { return Kind == ValType::SelectVal; }
261
262 Value *getSimpleValue() const {
263 assert(isSimpleValue() && "Wrong accessor");
264 return Val;
265 }
266
267 LoadInst *getCoercedLoadValue() const {
268 assert(isCoercedLoadValue() && "Wrong accessor");
269 return cast<LoadInst>(Val);
270 }
271
272 MemIntrinsic *getMemIntrinValue() const {
273 assert(isMemIntrinValue() && "Wrong accessor");
274 return cast<MemIntrinsic>(Val);
275 }
276
277 SelectInst *getSelectValue() const {
278 assert(isSelectValue() && "Wrong accessor");
279 return cast<SelectInst>(Val);
280 }
281
282 /// Emit code at the specified insertion point to adjust the value defined
283 /// here to the specified type. This handles various coercion cases.
284 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
285};
286
287/// Represents an AvailableValue which can be rematerialized at the end of
288/// the associated BasicBlock.
289struct llvm::gvn::AvailableValueInBlock {
290 /// BB - The basic block in question.
291 BasicBlock *BB = nullptr;
292
293 /// AV - The actual available value.
294 AvailableValue AV;
295
296 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
297 AvailableValueInBlock Res;
298 Res.BB = BB;
299 Res.AV = std::move(AV);
300 return Res;
301 }
302
303 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
304 unsigned Offset = 0) {
305 return get(BB, AV: AvailableValue::get(V, Offset));
306 }
307
308 static AvailableValueInBlock getUndef(BasicBlock *BB) {
309 return get(BB, AV: AvailableValue::getUndef());
310 }
311
312 static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
313 Value *V1, Value *V2) {
314 return get(BB, AV: AvailableValue::getSelect(Sel, V1, V2));
315 }
316
317 /// Emit code at the end of this block to adjust the value defined here to
318 /// the specified type. This handles various coercion cases.
319 Value *MaterializeAdjustedValue(LoadInst *Load) const {
320 return AV.MaterializeAdjustedValue(Load, InsertPt: BB->getTerminator());
321 }
322};
323
324//===----------------------------------------------------------------------===//
325// ValueTable Internal Functions
326//===----------------------------------------------------------------------===//
327
328GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
329 Expression E;
330 E.Ty = I->getType();
331 E.Opcode = I->getOpcode();
332 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Val: I)) {
333 // gc.relocate is 'special' call: its second and third operands are
334 // not real values, but indices into statepoint's argument list.
335 // Use the refered to values for purposes of identity.
336 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getOperand(i_nocapture: 0)));
337 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getBasePtr()));
338 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getDerivedPtr()));
339 } else {
340 for (Use &Op : I->operands())
341 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
342 }
343 if (I->isCommutative()) {
344 // Ensure that commutative instructions that only differ by a permutation
345 // of their operands get the same value number by sorting the operand value
346 // numbers. Since commutative operands are the 1st two operands it is more
347 // efficient to sort by hand rather than using, say, std::sort.
348 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
349 if (E.VarArgs[0] > E.VarArgs[1])
350 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
351 E.Commutative = true;
352 }
353
354 if (auto *C = dyn_cast<CmpInst>(Val: I)) {
355 // Sort the operand value numbers so x<y and y>x get the same value number.
356 CmpInst::Predicate Predicate = C->getPredicate();
357 if (E.VarArgs[0] > E.VarArgs[1]) {
358 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
359 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
360 }
361 E.Opcode = (C->getOpcode() << 8) | Predicate;
362 E.Commutative = true;
363 } else if (auto *IVI = dyn_cast<InsertValueInst>(Val: I)) {
364 E.VarArgs.append(in_start: IVI->idx_begin(), in_end: IVI->idx_end());
365 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
366 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
367 E.VarArgs.append(in_start: ShuffleMask.begin(), in_end: ShuffleMask.end());
368 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
369 E.Attrs = CB->getAttributes();
370 }
371
372 return E;
373}
374
375GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
376 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
377 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
378 "Not a comparison!");
379 Expression E;
380 E.Ty = CmpInst::makeCmpResultType(opnd_type: LHS->getType());
381 E.VarArgs.push_back(Elt: lookupOrAdd(V: LHS));
382 E.VarArgs.push_back(Elt: lookupOrAdd(V: RHS));
383
384 // Sort the operand value numbers so x<y and y>x get the same value number.
385 if (E.VarArgs[0] > E.VarArgs[1]) {
386 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
387 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
388 }
389 E.Opcode = (Opcode << 8) | Predicate;
390 E.Commutative = true;
391 return E;
392}
393
394GVNPass::Expression
395GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
396 assert(EI && "Not an ExtractValueInst?");
397 Expression E;
398 E.Ty = EI->getType();
399 E.Opcode = 0;
400
401 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand());
402 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
403 // EI is an extract from one of our with.overflow intrinsics. Synthesize
404 // a semantically equivalent expression instead of an extract value
405 // expression.
406 E.Opcode = WO->getBinaryOp();
407 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getLHS()));
408 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getRHS()));
409 return E;
410 }
411
412 // Not a recognised intrinsic. Fall back to producing an extract value
413 // expression.
414 E.Opcode = EI->getOpcode();
415 for (Use &Op : EI->operands())
416 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
417
418 append_range(C&: E.VarArgs, R: EI->indices());
419
420 return E;
421}
422
423GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
424 Expression E;
425 Type *PtrTy = GEP->getType()->getScalarType();
426 const DataLayout &DL = GEP->getDataLayout();
427 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: PtrTy);
428 SmallMapVector<Value *, APInt, 4> VariableOffsets;
429 APInt ConstantOffset(BitWidth, 0);
430 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
431 // Convert into offset representation, to recognize equivalent address
432 // calculations that use different type encoding.
433 LLVMContext &Context = GEP->getContext();
434 E.Opcode = GEP->getOpcode();
435 E.Ty = nullptr;
436 E.VarArgs.push_back(Elt: lookupOrAdd(V: GEP->getPointerOperand()));
437 for (const auto &[V, Scale] : VariableOffsets) {
438 E.VarArgs.push_back(Elt: lookupOrAdd(V));
439 E.VarArgs.push_back(Elt: lookupOrAdd(V: ConstantInt::get(Context, V: Scale)));
440 }
441 if (!ConstantOffset.isZero())
442 E.VarArgs.push_back(
443 Elt: lookupOrAdd(V: ConstantInt::get(Context, V: ConstantOffset)));
444 } else {
445 // If converting to offset representation fails (for scalable vectors),
446 // fall back to type-based implementation.
447 E.Opcode = GEP->getOpcode();
448 E.Ty = GEP->getSourceElementType();
449 for (Use &Op : GEP->operands())
450 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
451 }
452 return E;
453}
454
455//===----------------------------------------------------------------------===//
456// ValueTable External Functions
457//===----------------------------------------------------------------------===//
458
459GVNPass::ValueTable::ValueTable() = default;
460GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
461GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
462GVNPass::ValueTable::~ValueTable() = default;
463GVNPass::ValueTable &
464GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
465
466/// add - Insert a value into the table with a specified value number.
467void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
468 ValueNumbering.insert(KV: std::make_pair(x&: V, y&: Num));
469 if (PHINode *PN = dyn_cast<PHINode>(Val: V))
470 NumberingPhi[Num] = PN;
471}
472
473/// Include the incoming memory state into the hash of the expression for the
474/// given instruction. If the incoming memory state is:
475/// * LiveOnEntry, add the value number of the entry block,
476/// * a MemoryPhi, add the value number of the basic block corresponding to that
477/// MemoryPhi,
478/// * a MemoryDef, add the value number of the memory setting instruction.
479void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
480 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
481 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
482 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
483 Exp.VarArgs.push_back(Elt: lookupOrAdd(MA));
484}
485
486uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
487 // FIXME: Currently the calls which may access the thread id may
488 // be considered as not accessing the memory. But this is
489 // problematic for coroutines, since coroutines may resume in a
490 // different thread. So we disable the optimization here for the
491 // correctness. However, it may block many other correct
492 // optimizations. Revert this one when we detect the memory
493 // accessing kind more precisely.
494 if (C->getFunction()->isPresplitCoroutine()) {
495 ValueNumbering[C] = NextValueNumber;
496 return NextValueNumber++;
497 }
498
499 // Do not combine convergent calls since they implicitly depend on the set of
500 // threads that is currently executing, and they might be in different basic
501 // blocks.
502 if (C->isConvergent()) {
503 ValueNumbering[C] = NextValueNumber;
504 return NextValueNumber++;
505 }
506
507 if (AA->doesNotAccessMemory(Call: C)) {
508 Expression Exp = createExpr(I: C);
509 uint32_t E = assignExpNewValueNum(Exp).first;
510 ValueNumbering[C] = E;
511 return E;
512 }
513
514 if (MD && AA->onlyReadsMemory(Call: C)) {
515 Expression Exp = createExpr(I: C);
516 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
517 if (IsValNumNew) {
518 ValueNumbering[C] = E;
519 return E;
520 }
521
522 MemDepResult LocalDep = MD->getDependency(QueryInst: C);
523
524 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
525 ValueNumbering[C] = NextValueNumber;
526 return NextValueNumber++;
527 }
528
529 if (LocalDep.isDef()) {
530 // For masked load/store intrinsics, the local_dep may actually be
531 // a normal load or store instruction.
532 CallInst *LocalDepCall = dyn_cast<CallInst>(Val: LocalDep.getInst());
533
534 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
535 ValueNumbering[C] = NextValueNumber;
536 return NextValueNumber++;
537 }
538
539 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
540 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
541 uint32_t LocalDepCallVN = lookupOrAdd(V: LocalDepCall->getArgOperand(i: I));
542 if (CVN != LocalDepCallVN) {
543 ValueNumbering[C] = NextValueNumber;
544 return NextValueNumber++;
545 }
546 }
547
548 uint32_t V = lookupOrAdd(V: LocalDepCall);
549 ValueNumbering[C] = V;
550 return V;
551 }
552
553 // Non-local case.
554 const MemoryDependenceResults::NonLocalDepInfo &Deps =
555 MD->getNonLocalCallDependency(QueryCall: C);
556 // FIXME: Move the checking logic to MemDep!
557 CallInst *CDep = nullptr;
558
559 // Check to see if we have a single dominating call instruction that is
560 // identical to C.
561 for (const NonLocalDepEntry &I : Deps) {
562 if (I.getResult().isNonLocal())
563 continue;
564
565 // We don't handle non-definitions. If we already have a call, reject
566 // instruction dependencies.
567 if (!I.getResult().isDef() || CDep != nullptr) {
568 CDep = nullptr;
569 break;
570 }
571
572 CallInst *NonLocalDepCall = dyn_cast<CallInst>(Val: I.getResult().getInst());
573 // FIXME: All duplicated with non-local case.
574 if (NonLocalDepCall && DT->properlyDominates(A: I.getBB(), B: C->getParent())) {
575 CDep = NonLocalDepCall;
576 continue;
577 }
578
579 CDep = nullptr;
580 break;
581 }
582
583 if (!CDep) {
584 ValueNumbering[C] = NextValueNumber;
585 return NextValueNumber++;
586 }
587
588 if (CDep->arg_size() != C->arg_size()) {
589 ValueNumbering[C] = NextValueNumber;
590 return NextValueNumber++;
591 }
592 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
593 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
594 uint32_t CDepVN = lookupOrAdd(V: CDep->getArgOperand(i: I));
595 if (CVN != CDepVN) {
596 ValueNumbering[C] = NextValueNumber;
597 return NextValueNumber++;
598 }
599 }
600
601 uint32_t V = lookupOrAdd(V: CDep);
602 ValueNumbering[C] = V;
603 return V;
604 }
605
606 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(Call: C)) {
607 Expression Exp = createExpr(I: C);
608 addMemoryStateToExp(I: C, Exp);
609 auto [V, _] = assignExpNewValueNum(Exp);
610 ValueNumbering[C] = V;
611 return V;
612 }
613
614 ValueNumbering[C] = NextValueNumber;
615 return NextValueNumber++;
616}
617
618/// Returns the value number for the specified load or store instruction.
619uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
620 if (!MSSA || !IsMSSAEnabled) {
621 ValueNumbering[I] = NextValueNumber;
622 return NextValueNumber++;
623 }
624
625 Expression Exp;
626 Exp.Ty = I->getType();
627 Exp.Opcode = I->getOpcode();
628 for (Use &Op : I->operands())
629 Exp.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
630 addMemoryStateToExp(I, Exp);
631
632 auto [V, _] = assignExpNewValueNum(Exp);
633 ValueNumbering[I] = V;
634 return V;
635}
636
637/// Returns true if a value number exists for the specified value.
638bool GVNPass::ValueTable::exists(Value *V) const {
639 return ValueNumbering.contains(Val: V);
640}
641
642uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
643 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(Val: MA)
644 ? lookupOrAdd(V: MA->getBlock())
645 : lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
646}
647
648/// lookupOrAdd - Returns the value number for the specified value, assigning
649/// it a new number if it did not have one before.
650uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
651 DenseMap<Value *, uint32_t>::iterator VI = ValueNumbering.find(Val: V);
652 if (VI != ValueNumbering.end())
653 return VI->second;
654
655 auto *I = dyn_cast<Instruction>(Val: V);
656 if (!I) {
657 ValueNumbering[V] = NextValueNumber;
658 if (isa<BasicBlock>(Val: V))
659 NumberingBB[NextValueNumber] = cast<BasicBlock>(Val: V);
660 return NextValueNumber++;
661 }
662
663 Expression Exp;
664 switch (I->getOpcode()) {
665 case Instruction::Call:
666 return lookupOrAddCall(C: cast<CallInst>(Val: I));
667 case Instruction::FNeg:
668 case Instruction::Add:
669 case Instruction::FAdd:
670 case Instruction::Sub:
671 case Instruction::FSub:
672 case Instruction::Mul:
673 case Instruction::FMul:
674 case Instruction::UDiv:
675 case Instruction::SDiv:
676 case Instruction::FDiv:
677 case Instruction::URem:
678 case Instruction::SRem:
679 case Instruction::FRem:
680 case Instruction::Shl:
681 case Instruction::LShr:
682 case Instruction::AShr:
683 case Instruction::And:
684 case Instruction::Or:
685 case Instruction::Xor:
686 case Instruction::ICmp:
687 case Instruction::FCmp:
688 case Instruction::Trunc:
689 case Instruction::ZExt:
690 case Instruction::SExt:
691 case Instruction::FPToUI:
692 case Instruction::FPToSI:
693 case Instruction::UIToFP:
694 case Instruction::SIToFP:
695 case Instruction::FPTrunc:
696 case Instruction::FPExt:
697 case Instruction::PtrToInt:
698 case Instruction::PtrToAddr:
699 case Instruction::IntToPtr:
700 case Instruction::AddrSpaceCast:
701 case Instruction::BitCast:
702 case Instruction::Select:
703 case Instruction::Freeze:
704 case Instruction::ExtractElement:
705 case Instruction::InsertElement:
706 case Instruction::ShuffleVector:
707 case Instruction::InsertValue:
708 Exp = createExpr(I);
709 break;
710 case Instruction::GetElementPtr:
711 Exp = createGEPExpr(GEP: cast<GetElementPtrInst>(Val: I));
712 break;
713 case Instruction::ExtractValue:
714 Exp = createExtractvalueExpr(EI: cast<ExtractValueInst>(Val: I));
715 break;
716 case Instruction::PHI:
717 ValueNumbering[V] = NextValueNumber;
718 NumberingPhi[NextValueNumber] = cast<PHINode>(Val: V);
719 return NextValueNumber++;
720 case Instruction::Load:
721 case Instruction::Store:
722 return computeLoadStoreVN(I);
723 default:
724 ValueNumbering[V] = NextValueNumber;
725 return NextValueNumber++;
726 }
727
728 uint32_t E = assignExpNewValueNum(Exp).first;
729 ValueNumbering[V] = E;
730 return E;
731}
732
733/// Returns the value number of the specified value. Fails if
734/// the value has not yet been numbered.
735uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
736 DenseMap<Value *, uint32_t>::const_iterator VI = ValueNumbering.find(Val: V);
737 if (Verify) {
738 assert(VI != ValueNumbering.end() && "Value not numbered?");
739 return VI->second;
740 }
741 return (VI != ValueNumbering.end()) ? VI->second : 0;
742}
743
744/// Returns the value number of the given comparison,
745/// assigning it a new number if it did not have one before. Useful when
746/// we deduced the result of a comparison, but don't immediately have an
747/// instruction realizing that comparison to hand.
748uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
749 CmpInst::Predicate Predicate,
750 Value *LHS, Value *RHS) {
751 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
752 return assignExpNewValueNum(Exp).first;
753}
754
755/// Remove all entries from the ValueTable.
756void GVNPass::ValueTable::clear() {
757 ValueNumbering.clear();
758 ExpressionNumbering.clear();
759 NumberingPhi.clear();
760 NumberingBB.clear();
761 PhiTranslateTable.clear();
762 NextValueNumber = 1;
763 Expressions.clear();
764 ExprIdx.clear();
765 NextExprNumber = 0;
766}
767
768/// Remove a value from the value numbering.
769void GVNPass::ValueTable::erase(Value *V) {
770 uint32_t Num = ValueNumbering.lookup(Val: V);
771 ValueNumbering.erase(Val: V);
772 // If V is PHINode, V <--> value number is an one-to-one mapping.
773 if (isa<PHINode>(Val: V))
774 NumberingPhi.erase(Val: Num);
775 else if (isa<BasicBlock>(Val: V))
776 NumberingBB.erase(Val: Num);
777}
778
779/// verifyRemoved - Verify that the value is removed from all internal data
780/// structures.
781void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
782 assert(!ValueNumbering.contains(V) &&
783 "Inst still occurs in value numbering map!");
784}
785
786//===----------------------------------------------------------------------===//
787// LeaderMap External Functions
788//===----------------------------------------------------------------------===//
789
790/// Push a new Value to the LeaderTable onto the list for its value number.
791void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
792 LeaderListNode &Curr = NumToLeaders[N];
793 if (!Curr.Entry.Val) {
794 Curr.Entry.Val = V;
795 Curr.Entry.BB = BB;
796 return;
797 }
798
799 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
800 Node->Entry.Val = V;
801 Node->Entry.BB = BB;
802 Node->Next = Curr.Next;
803 Curr.Next = Node;
804}
805
806/// Scan the list of values corresponding to a given
807/// value number, and remove the given instruction if encountered.
808void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
809 const BasicBlock *BB) {
810 LeaderListNode *Prev = nullptr;
811 LeaderListNode *Curr = &NumToLeaders[N];
812
813 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
814 Prev = Curr;
815 Curr = Curr->Next;
816 }
817
818 if (!Curr)
819 return;
820
821 if (Prev) {
822 Prev->Next = Curr->Next;
823 } else {
824 if (!Curr->Next) {
825 Curr->Entry.Val = nullptr;
826 Curr->Entry.BB = nullptr;
827 } else {
828 LeaderListNode *Next = Curr->Next;
829 Curr->Entry.Val = Next->Entry.Val;
830 Curr->Entry.BB = Next->Entry.BB;
831 Curr->Next = Next->Next;
832 }
833 }
834}
835
836void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
837 // Walk through the value number scope to make sure the instruction isn't
838 // ferreted away in it.
839 for (const auto &I : NumToLeaders) {
840 (void)I;
841 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
842 assert(
843 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
844 [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
845 "Inst still in value numbering scope!");
846 }
847}
848
849//===----------------------------------------------------------------------===//
850// GVN Pass
851//===----------------------------------------------------------------------===//
852
853bool GVNPass::isPREEnabled() const {
854 return Options.AllowPRE.value_or(u&: GVNEnablePRE);
855}
856
857bool GVNPass::isLoadPREEnabled() const {
858 return Options.AllowLoadPRE.value_or(u&: GVNEnableLoadPRE);
859}
860
861bool GVNPass::isLoadInLoopPREEnabled() const {
862 return Options.AllowLoadInLoopPRE.value_or(u&: GVNEnableLoadInLoopPRE);
863}
864
865bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
866 return Options.AllowLoadPRESplitBackedge.value_or(
867 u&: GVNEnableSplitBackedgeInLoadPRE);
868}
869
870bool GVNPass::isMemDepEnabled() const {
871 return Options.AllowMemDep.value_or(u&: GVNEnableMemDep);
872}
873
874bool GVNPass::isMemorySSAEnabled() const {
875 return Options.AllowMemorySSA.value_or(u&: GVNEnableMemorySSA);
876}
877
878PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
879 // FIXME: The order of evaluation of these 'getResult' calls is very
880 // significant! Re-ordering these variables will cause GVN when run alone to
881 // be less effective! We should fix memdep and basic-aa to not exhibit this
882 // behavior, but until then don't change the order here.
883 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
884 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
885 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
886 auto &AA = AM.getResult<AAManager>(IR&: F);
887 auto *MemDep =
888 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(IR&: F) : nullptr;
889 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
890 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F);
891 if (isMemorySSAEnabled() && !MSSA) {
892 assert(!MemDep &&
893 "On-demand computation of MemSSA implies that MemDep is disabled!");
894 MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F);
895 }
896 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
897 bool Changed = runImpl(F, RunAC&: AC, RunDT&: DT, RunTLI: TLI, RunAA&: AA, RunMD: MemDep, LI, ORE: &ORE,
898 MSSA: MSSA ? &MSSA->getMSSA() : nullptr);
899 if (!Changed)
900 return PreservedAnalyses::all();
901 PreservedAnalyses PA;
902 PA.preserve<DominatorTreeAnalysis>();
903 PA.preserve<TargetLibraryAnalysis>();
904 if (MSSA)
905 PA.preserve<MemorySSAAnalysis>();
906 PA.preserve<LoopAnalysis>();
907 return PA;
908}
909
910void GVNPass::printPipeline(
911 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
912 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
913 OS, MapClassName2PassName);
914
915 OS << '<';
916 if (Options.AllowPRE != std::nullopt)
917 OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
918 if (Options.AllowLoadPRE != std::nullopt)
919 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
920 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
921 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
922 << "split-backedge-load-pre;";
923 if (Options.AllowMemDep != std::nullopt)
924 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
925 if (Options.AllowMemorySSA != std::nullopt)
926 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
927 OS << '>';
928}
929
930void GVNPass::salvageAndRemoveInstruction(Instruction *I) {
931 salvageKnowledge(I, AC);
932 salvageDebugInfo(I&: *I);
933 removeInstruction(I);
934}
935
936#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
937LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
938 errs() << "{\n";
939 for (const auto &[Num, Exp] : Map) {
940 errs() << Num << "\n";
941 Exp->dump();
942 }
943 errs() << "}\n";
944}
945#endif
946
947enum class AvailabilityState : char {
948 /// We know the block *is not* fully available. This is a fixpoint.
949 Unavailable = 0,
950 /// We know the block *is* fully available. This is a fixpoint.
951 Available = 1,
952 /// We do not know whether the block is fully available or not,
953 /// but we are currently speculating that it will be.
954 /// If it would have turned out that the block was, in fact, not fully
955 /// available, this would have been cleaned up into an Unavailable.
956 SpeculativelyAvailable = 2,
957};
958
959/// Return true if we can prove that the value
960/// we're analyzing is fully available in the specified block. As we go, keep
961/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
962/// map is actually a tri-state map with the following values:
963/// 0) we know the block *is not* fully available.
964/// 1) we know the block *is* fully available.
965/// 2) we do not know whether the block is fully available or not, but we are
966/// currently speculating that it will be.
967static bool IsValueFullyAvailableInBlock(
968 BasicBlock *BB,
969 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
970 SmallVector<BasicBlock *, 32> Worklist;
971 std::optional<BasicBlock *> UnavailableBB;
972
973 // The number of times we didn't find an entry for a block in a map and
974 // optimistically inserted an entry marking block as speculatively available.
975 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
976
977#ifndef NDEBUG
978 SmallPtrSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
979 SmallVector<BasicBlock *, 32> AvailableBBs;
980#endif
981
982 Worklist.emplace_back(Args&: BB);
983 while (!Worklist.empty()) {
984 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
985 // Optimistically assume that the block is Speculatively Available and check
986 // to see if we already know about this block in one lookup.
987 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
988 FullyAvailableBlocks.try_emplace(
989 Key: CurrBB, Args: AvailabilityState::SpeculativelyAvailable);
990 AvailabilityState &State = IV.first->second;
991
992 // Did the entry already exist for this block?
993 if (!IV.second) {
994 if (State == AvailabilityState::Unavailable) {
995 UnavailableBB = CurrBB;
996 break; // Backpropagate unavailability info.
997 }
998
999#ifndef NDEBUG
1000 AvailableBBs.emplace_back(CurrBB);
1001#endif
1002 continue; // Don't recurse further, but continue processing worklist.
1003 }
1004
1005 // No entry found for block.
1006 ++NumNewNewSpeculativelyAvailableBBs;
1007 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
1008
1009 // If we have exhausted our budget, mark this block as unavailable.
1010 // Also, if this block has no predecessors, the value isn't live-in here.
1011 if (OutOfBudget || pred_empty(BB: CurrBB)) {
1012 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1013 State = AvailabilityState::Unavailable;
1014 UnavailableBB = CurrBB;
1015 break; // Backpropagate unavailability info.
1016 }
1017
1018 // Tentatively consider this block as speculatively available.
1019#ifndef NDEBUG
1020 NewSpeculativelyAvailableBBs.insert(CurrBB);
1021#endif
1022 // And further recurse into block's predecessors, in depth-first order!
1023 Worklist.append(in_start: pred_begin(BB: CurrBB), in_end: pred_end(BB: CurrBB));
1024 }
1025
1026#if LLVM_ENABLE_STATS
1027 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1028 NumNewNewSpeculativelyAvailableBBs);
1029#endif
1030
1031 // If the block isn't marked as fixpoint yet
1032 // (the Unavailable and Available states are fixpoints).
1033 auto MarkAsFixpointAndEnqueueSuccessors =
1034 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1035 auto It = FullyAvailableBlocks.find(Val: BB);
1036 if (It == FullyAvailableBlocks.end())
1037 return; // Never queried this block, leave as-is.
1038 switch (AvailabilityState &State = It->second) {
1039 case AvailabilityState::Unavailable:
1040 case AvailabilityState::Available:
1041 return; // Don't backpropagate further, continue processing worklist.
1042 case AvailabilityState::SpeculativelyAvailable: // Fix it!
1043 State = FixpointState;
1044#ifndef NDEBUG
1045 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1046 "Found a speculatively available successor leftover?");
1047#endif
1048 // Queue successors for further processing.
1049 Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
1050 return;
1051 }
1052 };
1053
1054 if (UnavailableBB) {
1055 // Okay, we have encountered an unavailable block.
1056 // Mark speculatively available blocks reachable from UnavailableBB as
1057 // unavailable as well. Paths are terminated when they reach blocks not in
1058 // FullyAvailableBlocks or they are not marked as speculatively available.
1059 Worklist.clear();
1060 Worklist.append(in_start: succ_begin(BB: *UnavailableBB), in_end: succ_end(BB: *UnavailableBB));
1061 while (!Worklist.empty())
1062 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1063 AvailabilityState::Unavailable);
1064 }
1065
1066#ifndef NDEBUG
1067 Worklist.clear();
1068 for (BasicBlock *AvailableBB : AvailableBBs)
1069 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1070 while (!Worklist.empty())
1071 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1072 AvailabilityState::Available);
1073
1074 assert(NewSpeculativelyAvailableBBs.empty() &&
1075 "Must have fixed all the new speculatively available blocks.");
1076#endif
1077
1078 return !UnavailableBB;
1079}
1080
1081/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1082/// NewValue.
1083static void replaceValuesPerBlockEntry(
1084 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1085 Value *NewValue) {
1086 for (AvailableValueInBlock &V : ValuesPerBlock) {
1087 if (V.AV.Val == OldValue)
1088 V.AV.Val = NewValue;
1089 if (V.AV.isSelectValue()) {
1090 if (V.AV.V1 == OldValue)
1091 V.AV.V1 = NewValue;
1092 if (V.AV.V2 == OldValue)
1093 V.AV.V2 = NewValue;
1094 }
1095 }
1096}
1097
1098/// Given a set of loads specified by ValuesPerBlock,
1099/// construct SSA form, allowing us to eliminate Load. This returns the value
1100/// that should be used at Load's definition site.
1101static Value *
1102ConstructSSAForLoadSet(LoadInst *Load,
1103 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1104 GVNPass &GVN) {
1105 // Check for the fully redundant, dominating load case. In this case, we can
1106 // just use the dominating value directly.
1107 if (ValuesPerBlock.size() == 1 &&
1108 GVN.getDominatorTree().properlyDominates(A: ValuesPerBlock[0].BB,
1109 B: Load->getParent())) {
1110 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1111 "Dead BB dominate this block");
1112 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1113 }
1114
1115 // Otherwise, we have to construct SSA form.
1116 SmallVector<PHINode*, 8> NewPHIs;
1117 SSAUpdater SSAUpdate(&NewPHIs);
1118 SSAUpdate.Initialize(Ty: Load->getType(), Name: Load->getName());
1119
1120 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1121 BasicBlock *BB = AV.BB;
1122
1123 if (AV.AV.isUndefValue())
1124 continue;
1125
1126 if (SSAUpdate.HasValueForBlock(BB))
1127 continue;
1128
1129 // If the value is the load that we will be eliminating, and the block it's
1130 // available in is the block that the load is in, then don't add it as
1131 // SSAUpdater will resolve the value to the relevant phi which may let it
1132 // avoid phi construction entirely if there's actually only one value.
1133 if (BB == Load->getParent() &&
1134 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1135 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1136 continue;
1137
1138 SSAUpdate.AddAvailableValue(BB, V: AV.MaterializeAdjustedValue(Load));
1139 }
1140
1141 // Perform PHI construction.
1142 return SSAUpdate.GetValueInMiddleOfBlock(BB: Load->getParent());
1143}
1144
1145Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1146 Instruction *InsertPt) const {
1147 Value *Res;
1148 Type *LoadTy = Load->getType();
1149 const DataLayout &DL = Load->getDataLayout();
1150 if (isSimpleValue()) {
1151 Res = getSimpleValue();
1152 if (Res->getType() != LoadTy) {
1153 Res = getValueForLoad(SrcVal: Res, Offset, LoadTy, InsertPt, F: Load->getFunction());
1154
1155 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1156 << " " << *getSimpleValue() << '\n'
1157 << *Res << '\n'
1158 << "\n\n\n");
1159 }
1160 } else if (isCoercedLoadValue()) {
1161 LoadInst *CoercedLoad = getCoercedLoadValue();
1162 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1163 Res = CoercedLoad;
1164 combineMetadataForCSE(K: CoercedLoad, J: Load, DoesKMove: false);
1165 } else {
1166 Res = getValueForLoad(SrcVal: CoercedLoad, Offset, LoadTy, InsertPt,
1167 F: Load->getFunction());
1168 // We are adding a new user for this load, for which the original
1169 // metadata may not hold. Additionally, the new load may have a different
1170 // size and type, so their metadata cannot be combined in any
1171 // straightforward way.
1172 // Drop all metadata that is not known to cause immediate UB on violation,
1173 // unless the load has !noundef, in which case all metadata violations
1174 // will be promoted to UB.
1175 // TODO: We can combine noalias/alias.scope metadata here, because it is
1176 // independent of the load type.
1177 if (!CoercedLoad->hasMetadata(KindID: LLVMContext::MD_noundef))
1178 CoercedLoad->dropUnknownNonDebugMetadata(
1179 KnownIDs: {LLVMContext::MD_dereferenceable,
1180 LLVMContext::MD_dereferenceable_or_null,
1181 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1182 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1183 << " " << *getCoercedLoadValue() << '\n'
1184 << *Res << '\n'
1185 << "\n\n\n");
1186 }
1187 } else if (isMemIntrinValue()) {
1188 Res = getMemInstValueForLoad(SrcInst: getMemIntrinValue(), Offset, LoadTy,
1189 InsertPt, DL);
1190 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1191 << " " << *getMemIntrinValue() << '\n'
1192 << *Res << '\n'
1193 << "\n\n\n");
1194 } else if (isSelectValue()) {
1195 // Introduce a new value select for a load from an eligible pointer select.
1196 SelectInst *Sel = getSelectValue();
1197 assert(V1 && V2 && "both value operands of the select must be present");
1198 Res =
1199 SelectInst::Create(C: Sel->getCondition(), S1: V1, S2: V2, NameStr: "", InsertBefore: Sel->getIterator());
1200 // We use the DebugLoc from the original load here, as this instruction
1201 // materializes the value that would previously have been loaded.
1202 cast<SelectInst>(Val: Res)->setDebugLoc(Load->getDebugLoc());
1203 } else {
1204 llvm_unreachable("Should not materialize value from dead block");
1205 }
1206 assert(Res && "failed to materialize?");
1207 return Res;
1208}
1209
1210static bool isLifetimeStart(const Instruction *Inst) {
1211 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Val: Inst))
1212 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1213 return false;
1214}
1215
1216/// Assuming To can be reached from both From and Between, does Between lie on
1217/// every path from From to To?
1218static bool liesBetween(const Instruction *From, Instruction *Between,
1219 const Instruction *To, const DominatorTree *DT) {
1220 if (From->getParent() == Between->getParent())
1221 return DT->dominates(Def: From, User: Between);
1222 SmallPtrSet<BasicBlock *, 1> Exclusion;
1223 Exclusion.insert(Ptr: Between->getParent());
1224 return !isPotentiallyReachable(From, To, ExclusionSet: &Exclusion, DT);
1225}
1226
1227static const Instruction *findMayClobberedPtrAccess(LoadInst *Load,
1228 const DominatorTree *DT) {
1229 Value *PtrOp = Load->getPointerOperand();
1230 if (!PtrOp->hasUseList())
1231 return nullptr;
1232
1233 Instruction *OtherAccess = nullptr;
1234
1235 for (auto *U : PtrOp->users()) {
1236 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1237 auto *I = cast<Instruction>(Val: U);
1238 if (I->getFunction() == Load->getFunction() && DT->dominates(Def: I, User: Load)) {
1239 // Use the most immediately dominating value.
1240 if (OtherAccess) {
1241 if (DT->dominates(Def: OtherAccess, User: I))
1242 OtherAccess = I;
1243 else
1244 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1245 } else
1246 OtherAccess = I;
1247 }
1248 }
1249 }
1250
1251 if (OtherAccess)
1252 return OtherAccess;
1253
1254 // There is no dominating use, check if we can find a closest non-dominating
1255 // use that lies between any other potentially available use and Load.
1256 for (auto *U : PtrOp->users()) {
1257 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1258 auto *I = cast<Instruction>(Val: U);
1259 if (I->getFunction() == Load->getFunction() &&
1260 isPotentiallyReachable(From: I, To: Load, ExclusionSet: nullptr, DT)) {
1261 if (OtherAccess) {
1262 if (liesBetween(From: OtherAccess, Between: I, To: Load, DT)) {
1263 OtherAccess = I;
1264 } else if (!liesBetween(From: I, Between: OtherAccess, To: Load, DT)) {
1265 // These uses are both partially available at Load were it not for
1266 // the clobber, but neither lies strictly after the other.
1267 OtherAccess = nullptr;
1268 break;
1269 } // else: keep current OtherAccess since it lies between U and
1270 // Load.
1271 } else {
1272 OtherAccess = I;
1273 }
1274 }
1275 }
1276 }
1277
1278 return OtherAccess;
1279}
1280
1281/// Try to locate the three instruction involved in a missed
1282/// load-elimination case that is due to an intervening store.
1283static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1284 const DominatorTree *DT,
1285 OptimizationRemarkEmitter *ORE) {
1286 using namespace ore;
1287
1288 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1289 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1290 << setExtraArgs();
1291
1292 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1293 if (OtherAccess)
1294 R << " in favor of " << NV("OtherAccess", OtherAccess);
1295
1296 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1297
1298 ORE->emit(OptDiag&: R);
1299}
1300
1301// Find non-clobbered value for Loc memory location in extended basic block
1302// (chain of basic blocks with single predecessors) starting From instruction.
1303static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1304 Instruction *From, AAResults *AA) {
1305 uint32_t NumVisitedInsts = 0;
1306 BasicBlock *FromBB = From->getParent();
1307 BatchAAResults BatchAA(*AA);
1308 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1309 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1310 Inst != nullptr; Inst = Inst->getPrevNode()) {
1311 // Stop the search if limit is reached.
1312 if (++NumVisitedInsts > MaxNumVisitedInsts)
1313 return nullptr;
1314 if (isModSet(MRI: BatchAA.getModRefInfo(I: Inst, OptLoc: Loc)))
1315 return nullptr;
1316 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1317 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1318 return LI;
1319 }
1320 return nullptr;
1321}
1322
1323std::optional<AvailableValue>
1324GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1325 Value *Address) {
1326 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1327 assert(DepInfo.isLocal() && "expected a local dependence");
1328
1329 Instruction *DepInst = DepInfo.getInst();
1330
1331 const DataLayout &DL = Load->getDataLayout();
1332 if (DepInfo.isClobber()) {
1333 // If the dependence is to a store that writes to a superset of the bits
1334 // read by the load, we can extract the bits we need for the load from the
1335 // stored value.
1336 if (StoreInst *DepSI = dyn_cast<StoreInst>(Val: DepInst)) {
1337 // Can't forward from non-atomic to atomic without violating memory model.
1338 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1339 int Offset =
1340 analyzeLoadFromClobberingStore(LoadTy: Load->getType(), LoadPtr: Address, DepSI, DL);
1341 if (Offset != -1)
1342 return AvailableValue::get(V: DepSI->getValueOperand(), Offset);
1343 }
1344 }
1345
1346 // Check to see if we have something like this:
1347 // load i32* P
1348 // load i8* (P+1)
1349 // if we have this, replace the later with an extraction from the former.
1350 if (LoadInst *DepLoad = dyn_cast<LoadInst>(Val: DepInst)) {
1351 // If this is a clobber and L is the first instruction in its block, then
1352 // we have the first instruction in the entry block.
1353 // Can't forward from non-atomic to atomic without violating memory model.
1354 if (DepLoad != Load && Address &&
1355 Load->isAtomic() <= DepLoad->isAtomic()) {
1356 Type *LoadType = Load->getType();
1357 int Offset = -1;
1358
1359 // If MD reported clobber, check it was nested.
1360 if (DepInfo.isClobber() &&
1361 canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType,
1362 F: DepLoad->getFunction())) {
1363 const auto ClobberOff = MD->getClobberOffset(DepInst: DepLoad);
1364 // GVN has no deal with a negative offset.
1365 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1366 ? -1
1367 : *ClobberOff;
1368 }
1369 if (Offset == -1)
1370 Offset =
1371 analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr: Address, DepLI: DepLoad, DL);
1372 if (Offset != -1)
1373 return AvailableValue::getLoad(Load: DepLoad, Offset);
1374 }
1375 }
1376
1377 // If the clobbering value is a memset/memcpy/memmove, see if we can
1378 // forward a value on from it.
1379 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) {
1380 if (Address && !Load->isAtomic()) {
1381 int Offset = analyzeLoadFromClobberingMemInst(LoadTy: Load->getType(), LoadPtr: Address,
1382 DepMI, DL);
1383 if (Offset != -1)
1384 return AvailableValue::getMI(MI: DepMI, Offset);
1385 }
1386 }
1387
1388 // Nothing known about this clobber, have to be conservative.
1389 LLVM_DEBUG(
1390 // fast print dep, using operator<< on instruction is too slow.
1391 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1392 dbgs() << " is clobbered by " << *DepInst << '\n';);
1393 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1394 reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1395
1396 return std::nullopt;
1397 }
1398 assert(DepInfo.isDef() && "follows from above");
1399
1400 // Loading the alloca -> undef.
1401 // Loading immediately after lifetime begin -> undef.
1402 if (isa<AllocaInst>(Val: DepInst) || isLifetimeStart(Inst: DepInst))
1403 return AvailableValue::get(V: UndefValue::get(T: Load->getType()));
1404
1405 if (Constant *InitVal =
1406 getInitialValueOfAllocation(V: DepInst, TLI, Ty: Load->getType()))
1407 return AvailableValue::get(V: InitVal);
1408
1409 if (StoreInst *S = dyn_cast<StoreInst>(Val: DepInst)) {
1410 // Reject loads and stores that are to the same address but are of
1411 // different types if we have to. If the stored value is convertable to
1412 // the loaded value, we can reuse it.
1413 if (!canCoerceMustAliasedValueToLoad(StoredVal: S->getValueOperand(), LoadTy: Load->getType(),
1414 F: S->getFunction()))
1415 return std::nullopt;
1416
1417 // Can't forward from non-atomic to atomic without violating memory model.
1418 if (S->isAtomic() < Load->isAtomic())
1419 return std::nullopt;
1420
1421 return AvailableValue::get(V: S->getValueOperand());
1422 }
1423
1424 if (LoadInst *LD = dyn_cast<LoadInst>(Val: DepInst)) {
1425 // If the types mismatch and we can't handle it, reject reuse of the load.
1426 // If the stored value is larger or equal to the loaded value, we can reuse
1427 // it.
1428 if (!canCoerceMustAliasedValueToLoad(StoredVal: LD, LoadTy: Load->getType(),
1429 F: LD->getFunction()))
1430 return std::nullopt;
1431
1432 // Can't forward from non-atomic to atomic without violating memory model.
1433 if (LD->isAtomic() < Load->isAtomic())
1434 return std::nullopt;
1435
1436 return AvailableValue::getLoad(Load: LD);
1437 }
1438
1439 // Check if load with Addr dependent from select can be converted to select
1440 // between load values. There must be no instructions between the found
1441 // loads and DepInst that may clobber the loads.
1442 if (auto *Sel = dyn_cast<SelectInst>(Val: DepInst)) {
1443 assert(Sel->getType() == Load->getPointerOperandType());
1444 auto Loc = MemoryLocation::get(LI: Load);
1445 Value *V1 =
1446 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getTrueValue()),
1447 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1448 if (!V1)
1449 return std::nullopt;
1450 Value *V2 =
1451 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getFalseValue()),
1452 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1453 if (!V2)
1454 return std::nullopt;
1455 return AvailableValue::getSelect(Sel, V1, V2);
1456 }
1457
1458 // Unknown def - must be conservative.
1459 LLVM_DEBUG(
1460 // fast print dep, using operator<< on instruction is too slow.
1461 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1462 dbgs() << " has unknown def " << *DepInst << '\n';);
1463 return std::nullopt;
1464}
1465
1466void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1467 AvailValInBlkVect &ValuesPerBlock,
1468 UnavailBlkVect &UnavailableBlocks) {
1469 // Filter out useless results (non-locals, etc). Keep track of the blocks
1470 // where we have a value available in repl, also keep track of whether we see
1471 // dependencies that produce an unknown value for the load (such as a call
1472 // that could potentially clobber the load).
1473 for (const auto &Dep : Deps) {
1474 BasicBlock *DepBB = Dep.getBB();
1475 MemDepResult DepInfo = Dep.getResult();
1476
1477 if (DeadBlocks.count(key: DepBB)) {
1478 // Dead dependent mem-op disguise as a load evaluating the same value
1479 // as the load in question.
1480 ValuesPerBlock.push_back(Elt: AvailableValueInBlock::getUndef(BB: DepBB));
1481 continue;
1482 }
1483
1484 if (!DepInfo.isLocal()) {
1485 UnavailableBlocks.push_back(Elt: DepBB);
1486 continue;
1487 }
1488
1489 // The address being loaded in this non-local block may not be the same as
1490 // the pointer operand of the load if PHI translation occurs. Make sure
1491 // to consider the right address.
1492 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address: Dep.getAddress())) {
1493 // subtlety: because we know this was a non-local dependency, we know
1494 // it's safe to materialize anywhere between the instruction within
1495 // DepInfo and the end of it's block.
1496 ValuesPerBlock.push_back(
1497 Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV)));
1498 } else {
1499 UnavailableBlocks.push_back(Elt: DepBB);
1500 }
1501 }
1502
1503 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1504 "post condition violation");
1505}
1506
1507/// Given the following code, v1 is partially available on some edges, but not
1508/// available on the edge from PredBB. This function tries to find if there is
1509/// another identical load in the other successor of PredBB.
1510///
1511/// v0 = load %addr
1512/// br %LoadBB
1513///
1514/// LoadBB:
1515/// v1 = load %addr
1516/// ...
1517///
1518/// PredBB:
1519/// ...
1520/// br %cond, label %LoadBB, label %SuccBB
1521///
1522/// SuccBB:
1523/// v2 = load %addr
1524/// ...
1525///
1526LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1527 LoadInst *Load) {
1528 // For simplicity we handle a Pred has 2 successors only.
1529 auto *Term = Pred->getTerminator();
1530 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1531 return nullptr;
1532 auto *SuccBB = Term->getSuccessor(Idx: 0);
1533 if (SuccBB == LoadBB)
1534 SuccBB = Term->getSuccessor(Idx: 1);
1535 if (!SuccBB->getSinglePredecessor())
1536 return nullptr;
1537
1538 unsigned int NumInsts = MaxNumInsnsPerBlock;
1539 for (Instruction &Inst : *SuccBB) {
1540 if (Inst.isDebugOrPseudoInst())
1541 continue;
1542 if (--NumInsts == 0)
1543 return nullptr;
1544
1545 if (!Inst.isIdenticalTo(I: Load))
1546 continue;
1547
1548 MemDepResult Dep = MD->getDependency(QueryInst: &Inst);
1549 // If an identical load doesn't depends on any local instructions, it can
1550 // be safely moved to PredBB.
1551 // Also check for the implicit control flow instructions. See the comments
1552 // in PerformLoadPRE for details.
1553 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(Insn: &Inst))
1554 return cast<LoadInst>(Val: &Inst);
1555
1556 // Otherwise there is something in the same BB clobbers the memory, we can't
1557 // move this and later load to PredBB.
1558 return nullptr;
1559 }
1560
1561 return nullptr;
1562}
1563
1564void GVNPass::eliminatePartiallyRedundantLoad(
1565 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1566 MapVector<BasicBlock *, Value *> &AvailableLoads,
1567 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1568 for (const auto &AvailableLoad : AvailableLoads) {
1569 BasicBlock *UnavailableBlock = AvailableLoad.first;
1570 Value *LoadPtr = AvailableLoad.second;
1571
1572 auto *NewLoad = new LoadInst(
1573 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1574 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1575 UnavailableBlock->getTerminator()->getIterator());
1576 NewLoad->setDebugLoc(Load->getDebugLoc());
1577 if (MSSAU) {
1578 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1579 I: NewLoad, Definition: nullptr, BB: NewLoad->getParent(), Point: MemorySSA::BeforeTerminator);
1580 if (auto *NewDef = dyn_cast<MemoryDef>(Val: NewAccess))
1581 MSSAU->insertDef(Def: NewDef, /*RenameUses=*/true);
1582 else
1583 MSSAU->insertUse(Use: cast<MemoryUse>(Val: NewAccess), /*RenameUses=*/true);
1584 }
1585
1586 // Transfer the old load's AA tags to the new load.
1587 AAMDNodes Tags = Load->getAAMetadata();
1588 if (Tags)
1589 NewLoad->setAAMetadata(Tags);
1590
1591 if (auto *MD = Load->getMetadata(KindID: LLVMContext::MD_invariant_load))
1592 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
1593 if (auto *InvGroupMD = Load->getMetadata(KindID: LLVMContext::MD_invariant_group))
1594 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: InvGroupMD);
1595 if (auto *RangeMD = Load->getMetadata(KindID: LLVMContext::MD_range))
1596 NewLoad->setMetadata(KindID: LLVMContext::MD_range, Node: RangeMD);
1597 if (auto *NoFPClassMD = Load->getMetadata(KindID: LLVMContext::MD_nofpclass))
1598 NewLoad->setMetadata(KindID: LLVMContext::MD_nofpclass, Node: NoFPClassMD);
1599
1600 if (auto *AccessMD = Load->getMetadata(KindID: LLVMContext::MD_access_group))
1601 if (LI->getLoopFor(BB: Load->getParent()) == LI->getLoopFor(BB: UnavailableBlock))
1602 NewLoad->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessMD);
1603
1604 // We do not propagate the old load's debug location, because the new
1605 // load now lives in a different BB, and we want to avoid a jumpy line
1606 // table.
1607 // FIXME: How do we retain source locations without causing poor debugging
1608 // behavior?
1609
1610 // Add the newly created load.
1611 ValuesPerBlock.push_back(
1612 Elt: AvailableValueInBlock::get(BB: UnavailableBlock, V: NewLoad));
1613 MD->invalidateCachedPointerInfo(Ptr: LoadPtr);
1614 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1615
1616 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1617 // load instruction with the new created load instruction.
1618 if (CriticalEdgePredAndLoad) {
1619 auto It = CriticalEdgePredAndLoad->find(Key: UnavailableBlock);
1620 if (It != CriticalEdgePredAndLoad->end()) {
1621 ++NumPRELoadMoved2CEPred;
1622 ICF->insertInstructionTo(Inst: NewLoad, BB: UnavailableBlock);
1623 LoadInst *OldLoad = It->second;
1624 combineMetadataForCSE(K: NewLoad, J: OldLoad, DoesKMove: false);
1625 OldLoad->replaceAllUsesWith(V: NewLoad);
1626 replaceValuesPerBlockEntry(ValuesPerBlock, OldValue: OldLoad, NewValue: NewLoad);
1627 if (uint32_t ValNo = VN.lookup(V: OldLoad, Verify: false))
1628 LeaderTable.erase(N: ValNo, I: OldLoad, BB: OldLoad->getParent());
1629 removeInstruction(I: OldLoad);
1630 }
1631 }
1632 }
1633
1634 // Perform PHI construction.
1635 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
1636 // ConstructSSAForLoadSet is responsible for combining metadata.
1637 ICF->removeUsersOf(Inst: Load);
1638 Load->replaceAllUsesWith(V);
1639 if (isa<PHINode>(Val: V))
1640 V->takeName(V: Load);
1641 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1642 I->setDebugLoc(Load->getDebugLoc());
1643 if (V->getType()->isPtrOrPtrVectorTy())
1644 MD->invalidateCachedPointerInfo(Ptr: V);
1645 ORE->emit(RemarkBuilder: [&]() {
1646 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1647 << "load eliminated by PRE";
1648 });
1649 salvageAndRemoveInstruction(I: Load);
1650}
1651
1652bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1653 UnavailBlkVect &UnavailableBlocks) {
1654 // Okay, we have *some* definitions of the value. This means that the value
1655 // is available in some of our (transitive) predecessors. Lets think about
1656 // doing PRE of this load. This will involve inserting a new load into the
1657 // predecessor when it's not available. We could do this in general, but
1658 // prefer to not increase code size. As such, we only do this when we know
1659 // that we only have to insert *one* load (which means we're basically moving
1660 // the load, not inserting a new one).
1661
1662 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1663
1664 // Let's find the first basic block with more than one predecessor. Walk
1665 // backwards through predecessors if needed.
1666 BasicBlock *LoadBB = Load->getParent();
1667 BasicBlock *TmpBB = LoadBB;
1668
1669 // Check that there is no implicit control flow instructions above our load in
1670 // its block. If there is an instruction that doesn't always pass the
1671 // execution to the following instruction, then moving through it may become
1672 // invalid. For example:
1673 //
1674 // int arr[LEN];
1675 // int index = ???;
1676 // ...
1677 // guard(0 <= index && index < LEN);
1678 // use(arr[index]);
1679 //
1680 // It is illegal to move the array access to any point above the guard,
1681 // because if the index is out of bounds we should deoptimize rather than
1682 // access the array.
1683 // Check that there is no guard in this block above our instruction.
1684 bool MustEnsureSafetyOfSpeculativeExecution =
1685 ICF->isDominatedByICFIFromSameBlock(Insn: Load);
1686
1687 while (TmpBB->getSinglePredecessor()) {
1688 TmpBB = TmpBB->getSinglePredecessor();
1689 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1690 return false;
1691 if (Blockers.count(Ptr: TmpBB))
1692 return false;
1693
1694 // If any of these blocks has more than one successor (i.e. if the edge we
1695 // just traversed was critical), then there are other paths through this
1696 // block along which the load may not be anticipated. Hoisting the load
1697 // above this block would be adding the load to execution paths along
1698 // which it was not previously executed.
1699 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1700 return false;
1701
1702 // Check that there is no implicit control flow in a block above.
1703 MustEnsureSafetyOfSpeculativeExecution =
1704 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(BB: TmpBB);
1705 }
1706
1707 assert(TmpBB);
1708 LoadBB = TmpBB;
1709
1710 // Check to see how many predecessors have the loaded value fully
1711 // available.
1712 MapVector<BasicBlock *, Value *> PredLoads;
1713 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1714 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1715 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1716 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1717 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1718
1719 // The edge from Pred to LoadBB is a critical edge will be splitted.
1720 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1721 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1722 // contains a load can be moved to Pred. This data structure maps the Pred to
1723 // the movable load.
1724 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1725 for (BasicBlock *Pred : predecessors(BB: LoadBB)) {
1726 // If any predecessor block is an EH pad that does not allow non-PHI
1727 // instructions before the terminator, we can't PRE the load.
1728 if (Pred->getTerminator()->isEHPad()) {
1729 LLVM_DEBUG(
1730 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1731 << Pred->getName() << "': " << *Load << '\n');
1732 return false;
1733 }
1734
1735 if (IsValueFullyAvailableInBlock(BB: Pred, FullyAvailableBlocks)) {
1736 continue;
1737 }
1738
1739 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1740 if (isa<IndirectBrInst>(Val: Pred->getTerminator())) {
1741 LLVM_DEBUG(
1742 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1743 << Pred->getName() << "': " << *Load << '\n');
1744 return false;
1745 }
1746
1747 if (LoadBB->isEHPad()) {
1748 LLVM_DEBUG(
1749 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1750 << Pred->getName() << "': " << *Load << '\n');
1751 return false;
1752 }
1753
1754 // Do not split backedge as it will break the canonical loop form.
1755 if (!isLoadPRESplitBackedgeEnabled())
1756 if (DT->dominates(A: LoadBB, B: Pred)) {
1757 LLVM_DEBUG(
1758 dbgs()
1759 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1760 << Pred->getName() << "': " << *Load << '\n');
1761 return false;
1762 }
1763
1764 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1765 CriticalEdgePredAndLoad[Pred] = LI;
1766 else
1767 CriticalEdgePredSplit.push_back(Elt: Pred);
1768 } else {
1769 // Only add the predecessors that will not be split for now.
1770 PredLoads[Pred] = nullptr;
1771 }
1772 }
1773
1774 // Decide whether PRE is profitable for this load.
1775 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1776 unsigned NumUnavailablePreds = NumInsertPreds +
1777 CriticalEdgePredAndLoad.size();
1778 assert(NumUnavailablePreds != 0 &&
1779 "Fully available value should already be eliminated!");
1780 (void)NumUnavailablePreds;
1781
1782 // If we need to insert new load in multiple predecessors, reject it.
1783 // FIXME: If we could restructure the CFG, we could make a common pred with
1784 // all the preds that don't have an available Load and insert a new load into
1785 // that one block.
1786 if (NumInsertPreds > 1)
1787 return false;
1788
1789 // Now we know where we will insert load. We must ensure that it is safe
1790 // to speculatively execute the load at that points.
1791 if (MustEnsureSafetyOfSpeculativeExecution) {
1792 if (CriticalEdgePredSplit.size())
1793 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: &*LoadBB->getFirstNonPHIIt(), AC,
1794 DT))
1795 return false;
1796 for (auto &PL : PredLoads)
1797 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: PL.first->getTerminator(), AC,
1798 DT))
1799 return false;
1800 for (auto &CEP : CriticalEdgePredAndLoad)
1801 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: CEP.first->getTerminator(), AC,
1802 DT))
1803 return false;
1804 }
1805
1806 // Split critical edges, and update the unavailable predecessors accordingly.
1807 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1808 BasicBlock *NewPred = splitCriticalEdges(Pred: OrigPred, Succ: LoadBB);
1809 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1810 PredLoads[NewPred] = nullptr;
1811 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1812 << LoadBB->getName() << '\n');
1813 }
1814
1815 for (auto &CEP : CriticalEdgePredAndLoad)
1816 PredLoads[CEP.first] = nullptr;
1817
1818 // Check if the load can safely be moved to all the unavailable predecessors.
1819 bool CanDoPRE = true;
1820 const DataLayout &DL = Load->getDataLayout();
1821 SmallVector<Instruction*, 8> NewInsts;
1822 for (auto &PredLoad : PredLoads) {
1823 BasicBlock *UnavailablePred = PredLoad.first;
1824
1825 // Do PHI translation to get its value in the predecessor if necessary. The
1826 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1827 // We do the translation for each edge we skipped by going from Load's block
1828 // to LoadBB, otherwise we might miss pieces needing translation.
1829
1830 // If all preds have a single successor, then we know it is safe to insert
1831 // the load on the pred (?!?), so we can insert code to materialize the
1832 // pointer if it is not available.
1833 Value *LoadPtr = Load->getPointerOperand();
1834 BasicBlock *Cur = Load->getParent();
1835 while (Cur != LoadBB) {
1836 PHITransAddr Address(LoadPtr, DL, AC);
1837 LoadPtr = Address.translateWithInsertion(CurBB: Cur, PredBB: Cur->getSinglePredecessor(),
1838 DT: *DT, NewInsts);
1839 if (!LoadPtr) {
1840 CanDoPRE = false;
1841 break;
1842 }
1843 Cur = Cur->getSinglePredecessor();
1844 }
1845
1846 if (LoadPtr) {
1847 PHITransAddr Address(LoadPtr, DL, AC);
1848 LoadPtr = Address.translateWithInsertion(CurBB: LoadBB, PredBB: UnavailablePred, DT: *DT,
1849 NewInsts);
1850 }
1851 // If we couldn't find or insert a computation of this phi translated value,
1852 // we fail PRE.
1853 if (!LoadPtr) {
1854 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1855 << *Load->getPointerOperand() << "\n");
1856 CanDoPRE = false;
1857 break;
1858 }
1859
1860 PredLoad.second = LoadPtr;
1861 }
1862
1863 if (!CanDoPRE) {
1864 while (!NewInsts.empty()) {
1865 // Erase instructions generated by the failed PHI translation before
1866 // trying to number them. PHI translation might insert instructions
1867 // in basic blocks other than the current one, and we delete them
1868 // directly, as salvageAndRemoveInstruction only allows removing from the
1869 // current basic block.
1870 NewInsts.pop_back_val()->eraseFromParent();
1871 }
1872 // HINT: Don't revert the edge-splitting as following transformation may
1873 // also need to split these critical edges.
1874 return !CriticalEdgePredSplit.empty();
1875 }
1876
1877 // Okay, we can eliminate this load by inserting a reload in the predecessor
1878 // and using PHI construction to get the value in the other predecessors, do
1879 // it.
1880 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1881 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1882 << " INSTS: " << *NewInsts.back()
1883 << '\n');
1884
1885 // Assign value numbers to the new instructions.
1886 for (Instruction *I : NewInsts) {
1887 // Instructions that have been inserted in predecessor(s) to materialize
1888 // the load address do not retain their original debug locations. Doing
1889 // so could lead to confusing (but correct) source attributions.
1890 I->updateLocationAfterHoist();
1891
1892 // FIXME: We really _ought_ to insert these value numbers into their
1893 // parent's availability map. However, in doing so, we risk getting into
1894 // ordering issues. If a block hasn't been processed yet, we would be
1895 // marking a value as AVAIL-IN, which isn't what we intend.
1896 VN.lookupOrAdd(V: I);
1897 }
1898
1899 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads&: PredLoads,
1900 CriticalEdgePredAndLoad: &CriticalEdgePredAndLoad);
1901 ++NumPRELoad;
1902 return true;
1903}
1904
1905bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1906 AvailValInBlkVect &ValuesPerBlock,
1907 UnavailBlkVect &UnavailableBlocks) {
1908 const Loop *L = LI->getLoopFor(BB: Load->getParent());
1909 // TODO: Generalize to other loop blocks that dominate the latch.
1910 if (!L || L->getHeader() != Load->getParent())
1911 return false;
1912
1913 BasicBlock *Preheader = L->getLoopPreheader();
1914 BasicBlock *Latch = L->getLoopLatch();
1915 if (!Preheader || !Latch)
1916 return false;
1917
1918 Value *LoadPtr = Load->getPointerOperand();
1919 // Must be available in preheader.
1920 if (!L->isLoopInvariant(V: LoadPtr))
1921 return false;
1922
1923 // We plan to hoist the load to preheader without introducing a new fault.
1924 // In order to do it, we need to prove that we cannot side-exit the loop
1925 // once loop header is first entered before execution of the load.
1926 if (ICF->isDominatedByICFIFromSameBlock(Insn: Load))
1927 return false;
1928
1929 BasicBlock *LoopBlock = nullptr;
1930 for (auto *Blocker : UnavailableBlocks) {
1931 // Blockers from outside the loop are handled in preheader.
1932 if (!L->contains(BB: Blocker))
1933 continue;
1934
1935 // Only allow one loop block. Loop header is not less frequently executed
1936 // than each loop block, and likely it is much more frequently executed. But
1937 // in case of multiple loop blocks, we need extra information (such as block
1938 // frequency info) to understand whether it is profitable to PRE into
1939 // multiple loop blocks.
1940 if (LoopBlock)
1941 return false;
1942
1943 // Do not sink into inner loops. This may be non-profitable.
1944 if (L != LI->getLoopFor(BB: Blocker))
1945 return false;
1946
1947 // Blocks that dominate the latch execute on every single iteration, maybe
1948 // except the last one. So PREing into these blocks doesn't make much sense
1949 // in most cases. But the blocks that do not necessarily execute on each
1950 // iteration are sometimes much colder than the header, and this is when
1951 // PRE is potentially profitable.
1952 if (DT->dominates(A: Blocker, B: Latch))
1953 return false;
1954
1955 // Make sure that the terminator itself doesn't clobber.
1956 if (Blocker->getTerminator()->mayWriteToMemory())
1957 return false;
1958
1959 LoopBlock = Blocker;
1960 }
1961
1962 if (!LoopBlock)
1963 return false;
1964
1965 // Make sure the memory at this pointer cannot be freed, therefore we can
1966 // safely reload from it after clobber.
1967 if (LoadPtr->canBeFreed())
1968 return false;
1969
1970 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1971 MapVector<BasicBlock *, Value *> AvailableLoads;
1972 AvailableLoads[LoopBlock] = LoadPtr;
1973 AvailableLoads[Preheader] = LoadPtr;
1974
1975 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1976 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1977 /*CriticalEdgePredAndLoad*/ nullptr);
1978 ++NumPRELoopLoad;
1979 return true;
1980}
1981
1982static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1983 OptimizationRemarkEmitter *ORE) {
1984 using namespace ore;
1985
1986 ORE->emit(RemarkBuilder: [&]() {
1987 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1988 << "load of type " << NV("Type", Load->getType()) << " eliminated"
1989 << setExtraArgs() << " in favor of "
1990 << NV("InfavorOfValue", AvailableValue);
1991 });
1992}
1993
1994/// Attempt to eliminate a load whose dependencies are
1995/// non-local by performing PHI construction.
1996bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1997 // Non-local speculations are not allowed under asan.
1998 if (Load->getParent()->getParent()->hasFnAttribute(
1999 Kind: Attribute::SanitizeAddress) ||
2000 Load->getParent()->getParent()->hasFnAttribute(
2001 Kind: Attribute::SanitizeHWAddress))
2002 return false;
2003
2004 // Step 1: Find the non-local dependencies of the load.
2005 LoadDepVect Deps;
2006 MD->getNonLocalPointerDependency(QueryInst: Load, Result&: Deps);
2007
2008 // If we had to process more than one hundred blocks to find the
2009 // dependencies, this load isn't worth worrying about. Optimizing
2010 // it will be too expensive.
2011 unsigned NumDeps = Deps.size();
2012 if (NumDeps > MaxNumDeps)
2013 return false;
2014
2015 // If we had a phi translation failure, we'll have a single entry which is a
2016 // clobber in the current block. Reject this early.
2017 if (NumDeps == 1 &&
2018 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
2019 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2020 dbgs() << " has unknown dependencies\n";);
2021 return false;
2022 }
2023
2024 bool Changed = false;
2025 // If this load follows a GEP, see if we can PRE the indices before analyzing.
2026 if (GetElementPtrInst *GEP =
2027 dyn_cast<GetElementPtrInst>(Val: Load->getOperand(i_nocapture: 0))) {
2028 for (Use &U : GEP->indices())
2029 if (Instruction *I = dyn_cast<Instruction>(Val: U.get()))
2030 Changed |= performScalarPRE(I);
2031 }
2032
2033 // Step 2: Analyze the availability of the load.
2034 AvailValInBlkVect ValuesPerBlock;
2035 UnavailBlkVect UnavailableBlocks;
2036 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2037
2038 // If we have no predecessors that produce a known value for this load, exit
2039 // early.
2040 if (ValuesPerBlock.empty())
2041 return Changed;
2042
2043 // Step 3: Eliminate fully redundancy.
2044 //
2045 // If all of the instructions we depend on produce a known value for this
2046 // load, then it is fully redundant and we can use PHI insertion to compute
2047 // its value. Insert PHIs and remove the fully redundant value now.
2048 if (UnavailableBlocks.empty()) {
2049 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2050
2051 // Perform PHI construction.
2052 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
2053 // ConstructSSAForLoadSet is responsible for combining metadata.
2054 ICF->removeUsersOf(Inst: Load);
2055 Load->replaceAllUsesWith(V);
2056
2057 if (isa<PHINode>(Val: V))
2058 V->takeName(V: Load);
2059 if (Instruction *I = dyn_cast<Instruction>(Val: V))
2060 // If instruction I has debug info, then we should not update it.
2061 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2062 // to propagate Load's DebugLoc because Load may not post-dominate I.
2063 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2064 I->setDebugLoc(Load->getDebugLoc());
2065 if (V->getType()->isPtrOrPtrVectorTy())
2066 MD->invalidateCachedPointerInfo(Ptr: V);
2067 ++NumGVNLoad;
2068 reportLoadElim(Load, AvailableValue: V, ORE);
2069 salvageAndRemoveInstruction(I: Load);
2070 return true;
2071 }
2072
2073 // Step 4: Eliminate partial redundancy.
2074 if (!isPREEnabled() || !isLoadPREEnabled())
2075 return Changed;
2076 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(BB: Load->getParent()))
2077 return Changed;
2078
2079 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2080 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2081 return true;
2082
2083 return Changed;
2084}
2085
2086bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2087 Value *V = IntrinsicI->getArgOperand(i: 0);
2088
2089 if (ConstantInt *Cond = dyn_cast<ConstantInt>(Val: V)) {
2090 if (Cond->isZero()) {
2091 Type *Int8Ty = Type::getInt8Ty(C&: V->getContext());
2092 Type *PtrTy = PointerType::get(C&: V->getContext(), AddressSpace: 0);
2093 // Insert a new store to null instruction before the load to indicate that
2094 // this code is not reachable. FIXME: We could insert unreachable
2095 // instruction directly because we can modify the CFG.
2096 auto *NewS =
2097 new StoreInst(PoisonValue::get(T: Int8Ty), Constant::getNullValue(Ty: PtrTy),
2098 IntrinsicI->getIterator());
2099 if (MSSAU) {
2100 const MemoryUseOrDef *FirstNonDom = nullptr;
2101 const auto *AL =
2102 MSSAU->getMemorySSA()->getBlockAccesses(BB: IntrinsicI->getParent());
2103
2104 // If there are accesses in the current basic block, find the first one
2105 // that does not come before NewS. The new memory access is inserted
2106 // after the found access or before the terminator if no such access is
2107 // found.
2108 if (AL) {
2109 for (const auto &Acc : *AL) {
2110 if (auto *Current = dyn_cast<MemoryUseOrDef>(Val: &Acc))
2111 if (!Current->getMemoryInst()->comesBefore(Other: NewS)) {
2112 FirstNonDom = Current;
2113 break;
2114 }
2115 }
2116 }
2117
2118 auto *NewDef =
2119 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2120 I: NewS, Definition: nullptr,
2121 InsertPt: const_cast<MemoryUseOrDef *>(FirstNonDom))
2122 : MSSAU->createMemoryAccessInBB(
2123 I: NewS, Definition: nullptr,
2124 BB: NewS->getParent(), Point: MemorySSA::BeforeTerminator);
2125
2126 MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewDef), /*RenameUses=*/false);
2127 }
2128 }
2129 if (isAssumeWithEmptyBundle(Assume: *IntrinsicI)) {
2130 salvageAndRemoveInstruction(I: IntrinsicI);
2131 return true;
2132 }
2133 return false;
2134 }
2135
2136 if (isa<Constant>(Val: V)) {
2137 // If it's not false, and constant, it must evaluate to true. This means our
2138 // assume is assume(true), and thus, pointless, and we don't want to do
2139 // anything more here.
2140 return false;
2141 }
2142
2143 Constant *True = ConstantInt::getTrue(Context&: V->getContext());
2144 return propagateEquality(LHS: V, RHS: True, Root: IntrinsicI);
2145}
2146
2147static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2148 patchReplacementInstruction(I, Repl);
2149 I->replaceAllUsesWith(V: Repl);
2150}
2151
2152/// Attempt to eliminate a load, first by eliminating it
2153/// locally, and then attempting non-local elimination if that fails.
2154bool GVNPass::processLoad(LoadInst *L) {
2155 if (!MD)
2156 return false;
2157
2158 // This code hasn't been audited for ordered or volatile memory access.
2159 if (!L->isUnordered())
2160 return false;
2161
2162 if (L->getType()->isTokenLikeTy())
2163 return false;
2164
2165 if (L->use_empty()) {
2166 salvageAndRemoveInstruction(I: L);
2167 return true;
2168 }
2169
2170 // ... to a pointer that has been loaded from before...
2171 MemDepResult Dep = MD->getDependency(QueryInst: L);
2172
2173 // If it is defined in another block, try harder.
2174 if (Dep.isNonLocal())
2175 return processNonLocalLoad(Load: L);
2176
2177 // Only handle the local case below.
2178 if (!Dep.isLocal()) {
2179 // This might be a NonFuncLocal or an Unknown.
2180 LLVM_DEBUG(
2181 // fast print dep, using operator<< on instruction is too slow.
2182 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2183 dbgs() << " has unknown dependence\n";);
2184 return false;
2185 }
2186
2187 auto AV = AnalyzeLoadAvailability(Load: L, DepInfo: Dep, Address: L->getPointerOperand());
2188 if (!AV)
2189 return false;
2190
2191 Value *AvailableValue = AV->MaterializeAdjustedValue(Load: L, InsertPt: L);
2192
2193 // MaterializeAdjustedValue is responsible for combining metadata.
2194 ICF->removeUsersOf(Inst: L);
2195 L->replaceAllUsesWith(V: AvailableValue);
2196 if (MSSAU)
2197 MSSAU->removeMemoryAccess(I: L);
2198 ++NumGVNLoad;
2199 reportLoadElim(Load: L, AvailableValue, ORE);
2200 salvageAndRemoveInstruction(I: L);
2201 // Tell MDA to reexamine the reused pointer since we might have more
2202 // information after forwarding it.
2203 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2204 MD->invalidateCachedPointerInfo(Ptr: AvailableValue);
2205 return true;
2206}
2207
2208// Attempt to process masked loads which have loaded from
2209// masked stores with the same mask
2210bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
2211 if (!MD)
2212 return false;
2213 MemDepResult Dep = MD->getDependency(QueryInst: I);
2214 Instruction *DepInst = Dep.getInst();
2215 if (!DepInst || !Dep.isLocal() || !Dep.isDef())
2216 return false;
2217
2218 Value *Mask = I->getOperand(i_nocapture: 1);
2219 Value *Passthrough = I->getOperand(i_nocapture: 2);
2220 Value *StoreVal;
2221 if (!match(V: DepInst,
2222 P: m_MaskedStore(Op0: m_Value(V&: StoreVal), Op1: m_Value(), Op2: m_Specific(V: Mask))) ||
2223 StoreVal->getType() != I->getType())
2224 return false;
2225
2226 // Remove the load but generate a select for the passthrough
2227 Value *OpToForward = llvm::SelectInst::Create(C: Mask, S1: StoreVal, S2: Passthrough, NameStr: "",
2228 InsertBefore: I->getIterator());
2229
2230 ICF->removeUsersOf(Inst: I);
2231 I->replaceAllUsesWith(V: OpToForward);
2232 salvageAndRemoveInstruction(I);
2233 ++NumGVNLoad;
2234 return true;
2235}
2236
2237/// Return a pair the first field showing the value number of \p Exp and the
2238/// second field showing whether it is a value number newly created.
2239std::pair<uint32_t, bool>
2240GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2241 uint32_t &E = ExpressionNumbering[Exp];
2242 bool CreateNewValNum = !E;
2243 if (CreateNewValNum) {
2244 Expressions.push_back(x: Exp);
2245 if (ExprIdx.size() < NextValueNumber + 1)
2246 ExprIdx.resize(new_size: NextValueNumber * 2);
2247 E = NextValueNumber;
2248 ExprIdx[NextValueNumber++] = NextExprNumber++;
2249 }
2250 return {E, CreateNewValNum};
2251}
2252
2253/// Return whether all the values related with the same \p num are
2254/// defined in \p BB.
2255bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2256 GVNPass &GVN) {
2257 return all_of(
2258 Range: GVN.LeaderTable.getLeaders(N: Num),
2259 P: [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2260}
2261
2262/// Wrap phiTranslateImpl to provide caching functionality.
2263uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2264 const BasicBlock *PhiBlock,
2265 uint32_t Num, GVNPass &GVN) {
2266 auto FindRes = PhiTranslateTable.find(Val: {Num, Pred});
2267 if (FindRes != PhiTranslateTable.end())
2268 return FindRes->second;
2269 uint32_t NewNum = phiTranslateImpl(BB: Pred, PhiBlock, Num, GVN);
2270 PhiTranslateTable.insert(KV: {{Num, Pred}, NewNum});
2271 return NewNum;
2272}
2273
2274// Return true if the value number \p Num and NewNum have equal value.
2275// Return false if the result is unknown.
2276bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2277 const BasicBlock *Pred,
2278 const BasicBlock *PhiBlock,
2279 GVNPass &GVN) {
2280 CallInst *Call = nullptr;
2281 auto Leaders = GVN.LeaderTable.getLeaders(N: Num);
2282 for (const auto &Entry : Leaders) {
2283 Call = dyn_cast<CallInst>(Val: Entry.Val);
2284 if (Call && Call->getParent() == PhiBlock)
2285 break;
2286 }
2287
2288 if (AA->doesNotAccessMemory(Call))
2289 return true;
2290
2291 if (!MD || !AA->onlyReadsMemory(Call))
2292 return false;
2293
2294 MemDepResult LocalDep = MD->getDependency(QueryInst: Call);
2295 if (!LocalDep.isNonLocal())
2296 return false;
2297
2298 const MemoryDependenceResults::NonLocalDepInfo &Deps =
2299 MD->getNonLocalCallDependency(QueryCall: Call);
2300
2301 // Check to see if the Call has no function local clobber.
2302 for (const NonLocalDepEntry &D : Deps) {
2303 if (D.getResult().isNonFuncLocal())
2304 return true;
2305 }
2306 return false;
2307}
2308
2309/// Translate value number \p Num using phis, so that it has the values of
2310/// the phis in BB.
2311uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2312 const BasicBlock *PhiBlock,
2313 uint32_t Num, GVNPass &GVN) {
2314 // See if we can refine the value number by looking at the PN incoming value
2315 // for the given predecessor.
2316 if (PHINode *PN = NumberingPhi[Num]) {
2317 if (PN->getParent() != PhiBlock)
2318 return Num;
2319 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
2320 if (PN->getIncomingBlock(i: I) != Pred)
2321 continue;
2322 if (uint32_t TransVal = lookup(V: PN->getIncomingValue(i: I), Verify: false))
2323 return TransVal;
2324 }
2325 return Num;
2326 }
2327
2328 if (BasicBlock *BB = NumberingBB[Num]) {
2329 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2330 // Value numbers of basic blocks are used to represent memory state in
2331 // load/store instructions and read-only function calls when said state is
2332 // set by a MemoryPhi.
2333 if (BB != PhiBlock)
2334 return Num;
2335 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2336 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2337 if (MPhi->getIncomingBlock(I: i) != Pred)
2338 continue;
2339 MemoryAccess *MA = MPhi->getIncomingValue(I: i);
2340 if (auto *PredPhi = dyn_cast<MemoryPhi>(Val: MA))
2341 return lookupOrAdd(V: PredPhi->getBlock());
2342 if (MSSA->isLiveOnEntryDef(MA))
2343 return lookupOrAdd(V: &BB->getParent()->getEntryBlock());
2344 return lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
2345 }
2346 llvm_unreachable(
2347 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2348 }
2349
2350 // If there is any value related with Num is defined in a BB other than
2351 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2352 // a backedge. We can do an early exit in that case to save compile time.
2353 if (!areAllValsInBB(Num, BB: PhiBlock, GVN))
2354 return Num;
2355
2356 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2357 return Num;
2358 Expression Exp = Expressions[ExprIdx[Num]];
2359
2360 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
2361 // For InsertValue and ExtractValue, some varargs are index numbers
2362 // instead of value numbers. Those index numbers should not be
2363 // translated.
2364 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
2365 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
2366 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
2367 continue;
2368 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Num: Exp.VarArgs[I], GVN);
2369 }
2370
2371 if (Exp.Commutative) {
2372 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
2373 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
2374 std::swap(a&: Exp.VarArgs[0], b&: Exp.VarArgs[1]);
2375 uint32_t Opcode = Exp.Opcode >> 8;
2376 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2377 Exp.Opcode = (Opcode << 8) |
2378 CmpInst::getSwappedPredicate(
2379 pred: static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
2380 }
2381 }
2382
2383 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
2384 if (Exp.Opcode == Instruction::Call && NewNum != Num)
2385 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
2386 return NewNum;
2387 }
2388 return Num;
2389}
2390
2391/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2392/// again.
2393void GVNPass::ValueTable::eraseTranslateCacheEntry(
2394 uint32_t Num, const BasicBlock &CurrBlock) {
2395 for (const BasicBlock *Pred : predecessors(BB: &CurrBlock))
2396 PhiTranslateTable.erase(Val: {Num, Pred});
2397}
2398
2399// In order to find a leader for a given value number at a
2400// specific basic block, we first obtain the list of all Values for that number,
2401// and then scan the list to find one whose block dominates the block in
2402// question. This is fast because dominator tree queries consist of only
2403// a few comparisons of DFS numbers.
2404Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
2405 auto Leaders = LeaderTable.getLeaders(N: Num);
2406 if (Leaders.empty())
2407 return nullptr;
2408
2409 Value *Val = nullptr;
2410 for (const auto &Entry : Leaders) {
2411 if (DT->dominates(A: Entry.BB, B: BB)) {
2412 Val = Entry.Val;
2413 if (isa<Constant>(Val))
2414 return Val;
2415 }
2416 }
2417
2418 return Val;
2419}
2420
2421/// There is an edge from 'Src' to 'Dst'. Return
2422/// true if every path from the entry block to 'Dst' passes via this edge. In
2423/// particular 'Dst' must not be reachable via another edge from 'Src'.
2424static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2425 DominatorTree *DT) {
2426 // While in theory it is interesting to consider the case in which Dst has
2427 // more than one predecessor, because Dst might be part of a loop which is
2428 // only reachable from Src, in practice it is pointless since at the time
2429 // GVN runs all such loops have preheaders, which means that Dst will have
2430 // been changed to have only one predecessor, namely Src.
2431 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2432 assert((!Pred || Pred == E.getStart()) &&
2433 "No edge between these basic blocks!");
2434 return Pred != nullptr;
2435}
2436
2437void GVNPass::assignBlockRPONumber(Function &F) {
2438 BlockRPONumber.clear();
2439 uint32_t NextBlockNumber = 1;
2440 ReversePostOrderTraversal<Function *> RPOT(&F);
2441 for (BasicBlock *BB : RPOT)
2442 BlockRPONumber[BB] = NextBlockNumber++;
2443 InvalidBlockRPONumbers = false;
2444}
2445
2446/// The given values are known to be equal in every use
2447/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2448/// 'RHS' everywhere in the scope. Returns whether a change was made.
2449/// The Root may either be a basic block edge (for conditions) or an
2450/// instruction (for assumes).
2451bool GVNPass::propagateEquality(
2452 Value *LHS, Value *RHS,
2453 const std::variant<BasicBlockEdge, Instruction *> &Root) {
2454 SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2455 Worklist.push_back(Elt: std::make_pair(x&: LHS, y&: RHS));
2456 bool Changed = false;
2457 SmallVector<const BasicBlock *> DominatedBlocks;
2458 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
2459 // For speed, compute a conservative fast approximation to
2460 // DT->dominates(Root, Root.getEnd());
2461 if (isOnlyReachableViaThisEdge(E: *Edge, DT))
2462 DominatedBlocks.push_back(Elt: Edge->getEnd());
2463 } else {
2464 Instruction *I = std::get<Instruction *>(v: Root);
2465 for (const auto *Node : DT->getNode(BB: I->getParent())->children())
2466 DominatedBlocks.push_back(Elt: Node->getBlock());
2467 }
2468
2469 while (!Worklist.empty()) {
2470 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2471 LHS = Item.first; RHS = Item.second;
2472
2473 if (LHS == RHS)
2474 continue;
2475 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2476
2477 // Don't try to propagate equalities between constants.
2478 if (isa<Constant>(Val: LHS) && isa<Constant>(Val: RHS))
2479 continue;
2480
2481 // Prefer a constant on the right-hand side, or an Argument if no constants.
2482 if (isa<Constant>(Val: LHS) || (isa<Argument>(Val: LHS) && !isa<Constant>(Val: RHS)))
2483 std::swap(a&: LHS, b&: RHS);
2484 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2485 const DataLayout &DL =
2486 isa<Argument>(Val: LHS)
2487 ? cast<Argument>(Val: LHS)->getParent()->getDataLayout()
2488 : cast<Instruction>(Val: LHS)->getDataLayout();
2489
2490 // If there is no obvious reason to prefer the left-hand side over the
2491 // right-hand side, ensure the longest lived term is on the right-hand side,
2492 // so the shortest lived term will be replaced by the longest lived.
2493 // This tends to expose more simplifications.
2494 uint32_t LVN = VN.lookupOrAdd(V: LHS);
2495 if ((isa<Argument>(Val: LHS) && isa<Argument>(Val: RHS)) ||
2496 (isa<Instruction>(Val: LHS) && isa<Instruction>(Val: RHS))) {
2497 // Move the 'oldest' value to the right-hand side, using the value number
2498 // as a proxy for age.
2499 uint32_t RVN = VN.lookupOrAdd(V: RHS);
2500 if (LVN < RVN) {
2501 std::swap(a&: LHS, b&: RHS);
2502 LVN = RVN;
2503 }
2504 }
2505
2506 // If value numbering later sees that an instruction in the scope is equal
2507 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2508 // the invariant that instructions only occur in the leader table for their
2509 // own value number (this is used by removeFromLeaderTable), do not do this
2510 // if RHS is an instruction (if an instruction in the scope is morphed into
2511 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2512 // using the leader table is about compiling faster, not optimizing better).
2513 // The leader table only tracks basic blocks, not edges. Only add to if we
2514 // have the simple case where the edge dominates the end.
2515 if (!isa<Instruction>(Val: RHS) && canReplacePointersIfEqual(From: LHS, To: RHS, DL))
2516 for (const BasicBlock *BB : DominatedBlocks)
2517 LeaderTable.insert(N: LVN, V: RHS, BB);
2518
2519 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2520 // LHS always has at least one use that is not dominated by Root, this will
2521 // never do anything if LHS has only one use.
2522 if (!LHS->hasOneUse()) {
2523 // Create a callback that captures the DL.
2524 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2525 return canReplacePointersInUseIfEqual(U, To, DL);
2526 };
2527 unsigned NumReplacements;
2528 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
2529 NumReplacements = replaceDominatedUsesWithIf(
2530 From: LHS, To: RHS, DT&: *DT, Edge: *Edge, ShouldReplace: CanReplacePointersCallBack);
2531 else
2532 NumReplacements = replaceDominatedUsesWithIf(
2533 From: LHS, To: RHS, DT&: *DT, I: std::get<Instruction *>(v: Root),
2534 ShouldReplace: CanReplacePointersCallBack);
2535
2536 if (NumReplacements > 0) {
2537 Changed = true;
2538 NumGVNEqProp += NumReplacements;
2539 // Cached information for anything that uses LHS will be invalid.
2540 if (MD)
2541 MD->invalidateCachedPointerInfo(Ptr: LHS);
2542 }
2543 }
2544
2545 // Now try to deduce additional equalities from this one. For example, if
2546 // the known equality was "(A != B)" == "false" then it follows that A and B
2547 // are equal in the scope. Only boolean equalities with an explicit true or
2548 // false RHS are currently supported.
2549 if (!RHS->getType()->isIntegerTy(Bitwidth: 1))
2550 // Not a boolean equality - bail out.
2551 continue;
2552 ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS);
2553 if (!CI)
2554 // RHS neither 'true' nor 'false' - bail out.
2555 continue;
2556 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2557 bool IsKnownTrue = CI->isMinusOne();
2558 bool IsKnownFalse = !IsKnownTrue;
2559
2560 // If "A && B" is known true then both A and B are known true. If "A || B"
2561 // is known false then both A and B are known false.
2562 Value *A, *B;
2563 if ((IsKnownTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) ||
2564 (IsKnownFalse && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
2565 Worklist.push_back(Elt: std::make_pair(x&: A, y&: RHS));
2566 Worklist.push_back(Elt: std::make_pair(x&: B, y&: RHS));
2567 continue;
2568 }
2569
2570 // If we are propagating an equality like "(A == B)" == "true" then also
2571 // propagate the equality A == B. When propagating a comparison such as
2572 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2573 if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: LHS)) {
2574 Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1);
2575
2576 // If "A == B" is known true, or "A != B" is known false, then replace
2577 // A with B everywhere in the scope. For floating point operations, we
2578 // have to be careful since equality does not always imply equivalance.
2579 if (Cmp->isEquivalence(Invert: IsKnownFalse))
2580 Worklist.push_back(Elt: std::make_pair(x&: Op0, y&: Op1));
2581
2582 // If "A >= B" is known true, replace "A < B" with false everywhere.
2583 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2584 Constant *NotVal = ConstantInt::get(Ty: Cmp->getType(), V: IsKnownFalse);
2585 // Since we don't have the instruction "A < B" immediately to hand, work
2586 // out the value number that it would have and use that to find an
2587 // appropriate instruction (if any).
2588 uint32_t NextNum = VN.getNextUnusedValueNumber();
2589 uint32_t Num = VN.lookupOrAddCmp(Opcode: Cmp->getOpcode(), Predicate: NotPred, LHS: Op0, RHS: Op1);
2590 // If the number we were assigned was brand new then there is no point in
2591 // looking for an instruction realizing it: there cannot be one!
2592 if (Num < NextNum) {
2593 for (const auto &Entry : LeaderTable.getLeaders(N: Num)) {
2594 // Only look at leaders that either dominate the start of the edge,
2595 // or are dominated by the end. This check is not necessary for
2596 // correctness, it only discards cases for which the following
2597 // use replacement will not work anyway.
2598 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
2599 if (!DT->dominates(A: Entry.BB, B: Edge->getStart()) &&
2600 !DT->dominates(A: Edge->getEnd(), B: Entry.BB))
2601 continue;
2602 } else {
2603 auto *InstBB = std::get<Instruction *>(v: Root)->getParent();
2604 if (!DT->dominates(A: Entry.BB, B: InstBB) &&
2605 !DT->dominates(A: InstBB, B: Entry.BB))
2606 continue;
2607 }
2608
2609 Value *NotCmp = Entry.Val;
2610 if (NotCmp && isa<Instruction>(Val: NotCmp)) {
2611 unsigned NumReplacements;
2612 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
2613 NumReplacements =
2614 replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, Edge: *Edge);
2615 else
2616 NumReplacements = replaceDominatedUsesWith(
2617 From: NotCmp, To: NotVal, DT&: *DT, I: std::get<Instruction *>(v: Root));
2618 Changed |= NumReplacements > 0;
2619 NumGVNEqProp += NumReplacements;
2620 // Cached information for anything that uses NotCmp will be invalid.
2621 if (MD)
2622 MD->invalidateCachedPointerInfo(Ptr: NotCmp);
2623 }
2624 }
2625 }
2626 // Ensure that any instruction in scope that gets the "A < B" value number
2627 // is replaced with false.
2628 // The leader table only tracks basic blocks, not edges. Only add to if we
2629 // have the simple case where the edge dominates the end.
2630 for (const BasicBlock *BB : DominatedBlocks)
2631 LeaderTable.insert(N: Num, V: NotVal, BB);
2632
2633 continue;
2634 }
2635
2636 // Propagate equalities that results from truncation with no unsigned wrap
2637 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
2638 // "false"
2639 if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V&: A)))) {
2640 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: IsKnownTrue));
2641 continue;
2642 }
2643
2644 if (match(V: LHS, P: m_Not(V: m_Value(V&: A)))) {
2645 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: !IsKnownTrue));
2646 continue;
2647 }
2648 }
2649
2650 return Changed;
2651}
2652
2653/// When calculating availability, handle an instruction
2654/// by inserting it into the appropriate sets.
2655bool GVNPass::processInstruction(Instruction *I) {
2656 // If the instruction can be easily simplified then do so now in preference
2657 // to value numbering it. Value numbering often exposes redundancies, for
2658 // example if it determines that %y is equal to %x then the instruction
2659 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2660 const DataLayout &DL = I->getDataLayout();
2661 if (Value *V = simplifyInstruction(I, Q: {DL, TLI, DT, AC})) {
2662 bool Changed = false;
2663 if (!I->use_empty()) {
2664 // Simplification can cause a special instruction to become not special.
2665 // For example, devirtualization to a willreturn function.
2666 ICF->removeUsersOf(Inst: I);
2667 I->replaceAllUsesWith(V);
2668 Changed = true;
2669 }
2670 if (isInstructionTriviallyDead(I, TLI)) {
2671 salvageAndRemoveInstruction(I);
2672 Changed = true;
2673 }
2674 if (Changed) {
2675 if (MD && V->getType()->isPtrOrPtrVectorTy())
2676 MD->invalidateCachedPointerInfo(Ptr: V);
2677 ++NumGVNSimpl;
2678 return true;
2679 }
2680 }
2681
2682 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
2683 return processAssumeIntrinsic(IntrinsicI: Assume);
2684
2685 if (LoadInst *Load = dyn_cast<LoadInst>(Val: I)) {
2686 if (processLoad(L: Load))
2687 return true;
2688
2689 unsigned Num = VN.lookupOrAdd(V: Load);
2690 LeaderTable.insert(N: Num, V: Load, BB: Load->getParent());
2691 return false;
2692 }
2693
2694 if (match(V: I, P: m_Intrinsic<Intrinsic::masked_load>()) &&
2695 processMaskedLoad(I: cast<IntrinsicInst>(Val: I)))
2696 return true;
2697
2698 // For conditional branches, we can perform simple conditional propagation on
2699 // the condition value itself.
2700 if (BranchInst *BI = dyn_cast<BranchInst>(Val: I)) {
2701 if (!BI->isConditional())
2702 return false;
2703
2704 if (isa<Constant>(Val: BI->getCondition()))
2705 return processFoldableCondBr(BI);
2706
2707 Value *BranchCond = BI->getCondition();
2708 BasicBlock *TrueSucc = BI->getSuccessor(i: 0);
2709 BasicBlock *FalseSucc = BI->getSuccessor(i: 1);
2710 // Avoid multiple edges early.
2711 if (TrueSucc == FalseSucc)
2712 return false;
2713
2714 BasicBlock *Parent = BI->getParent();
2715 bool Changed = false;
2716
2717 Value *TrueVal = ConstantInt::getTrue(Context&: TrueSucc->getContext());
2718 BasicBlockEdge TrueE(Parent, TrueSucc);
2719 Changed |= propagateEquality(LHS: BranchCond, RHS: TrueVal, Root: TrueE);
2720
2721 Value *FalseVal = ConstantInt::getFalse(Context&: FalseSucc->getContext());
2722 BasicBlockEdge FalseE(Parent, FalseSucc);
2723 Changed |= propagateEquality(LHS: BranchCond, RHS: FalseVal, Root: FalseE);
2724
2725 return Changed;
2726 }
2727
2728 // For switches, propagate the case values into the case destinations.
2729 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: I)) {
2730 Value *SwitchCond = SI->getCondition();
2731 BasicBlock *Parent = SI->getParent();
2732 bool Changed = false;
2733
2734 // Remember how many outgoing edges there are to every successor.
2735 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2736 for (BasicBlock *Succ : successors(BB: Parent))
2737 ++SwitchEdges[Succ];
2738
2739 for (const auto &Case : SI->cases()) {
2740 BasicBlock *Dst = Case.getCaseSuccessor();
2741 // If there is only a single edge, propagate the case value into it.
2742 if (SwitchEdges.lookup(Val: Dst) == 1) {
2743 BasicBlockEdge E(Parent, Dst);
2744 Changed |= propagateEquality(LHS: SwitchCond, RHS: Case.getCaseValue(), Root: E);
2745 }
2746 }
2747 return Changed;
2748 }
2749
2750 // Instructions with void type don't return a value, so there's
2751 // no point in trying to find redundancies in them.
2752 if (I->getType()->isVoidTy())
2753 return false;
2754
2755 uint32_t NextNum = VN.getNextUnusedValueNumber();
2756 unsigned Num = VN.lookupOrAdd(V: I);
2757
2758 // Allocations are always uniquely numbered, so we can save time and memory
2759 // by fast failing them.
2760 if (isa<AllocaInst>(Val: I) || I->isTerminator() || isa<PHINode>(Val: I)) {
2761 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2762 return false;
2763 }
2764
2765 // If the number we were assigned was a brand new VN, then we don't
2766 // need to do a lookup to see if the number already exists
2767 // somewhere in the domtree: it can't!
2768 if (Num >= NextNum) {
2769 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2770 return false;
2771 }
2772
2773 // Perform fast-path value-number based elimination of values inherited from
2774 // dominators.
2775 Value *Repl = findLeader(BB: I->getParent(), Num);
2776 if (!Repl) {
2777 // Failure, just remember this instance for future use.
2778 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2779 return false;
2780 }
2781
2782 if (Repl == I) {
2783 // If I was the result of a shortcut PRE, it might already be in the table
2784 // and the best replacement for itself. Nothing to do.
2785 return false;
2786 }
2787
2788 // Remove it!
2789 patchAndReplaceAllUsesWith(I, Repl);
2790 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2791 MD->invalidateCachedPointerInfo(Ptr: Repl);
2792 salvageAndRemoveInstruction(I);
2793 return true;
2794}
2795
2796/// runOnFunction - This is the main transformation entry point for a function.
2797bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2798 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2799 MemoryDependenceResults *RunMD, LoopInfo &LI,
2800 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2801 AC = &RunAC;
2802 DT = &RunDT;
2803 VN.setDomTree(DT);
2804 TLI = &RunTLI;
2805 VN.setAliasAnalysis(&RunAA);
2806 MD = RunMD;
2807 ImplicitControlFlowTracking ImplicitCFT;
2808 ICF = &ImplicitCFT;
2809 this->LI = &LI;
2810 VN.setMemDep(M: MD);
2811 VN.setMemorySSA(M: MSSA);
2812 ORE = RunORE;
2813 InvalidBlockRPONumbers = true;
2814 MemorySSAUpdater Updater(MSSA);
2815 MSSAU = MSSA ? &Updater : nullptr;
2816
2817 bool Changed = false;
2818 bool ShouldContinue = true;
2819
2820 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2821 // Merge unconditional branches, allowing PRE to catch more
2822 // optimization opportunities.
2823 for (BasicBlock &BB : make_early_inc_range(Range&: F)) {
2824 bool RemovedBlock = MergeBlockIntoPredecessor(BB: &BB, DTU: &DTU, LI: &LI, MSSAU, MemDep: MD);
2825 if (RemovedBlock)
2826 ++NumGVNBlocks;
2827
2828 Changed |= RemovedBlock;
2829 }
2830 DTU.flush();
2831
2832 unsigned Iteration = 0;
2833 while (ShouldContinue) {
2834 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2835 (void) Iteration;
2836 ShouldContinue = iterateOnFunction(F);
2837 Changed |= ShouldContinue;
2838 ++Iteration;
2839 }
2840
2841 if (isPREEnabled()) {
2842 // Fabricate val-num for dead-code in order to suppress assertion in
2843 // performPRE().
2844 assignValNumForDeadCode();
2845 bool PREChanged = true;
2846 while (PREChanged) {
2847 PREChanged = performPRE(F);
2848 Changed |= PREChanged;
2849 }
2850 }
2851
2852 // FIXME: Should perform GVN again after PRE does something. PRE can move
2853 // computations into blocks where they become fully redundant. Note that
2854 // we can't do this until PRE's critical edge splitting updates memdep.
2855 // Actually, when this happens, we should just fully integrate PRE into GVN.
2856
2857 cleanupGlobalSets();
2858 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2859 // iteration.
2860 DeadBlocks.clear();
2861
2862 if (MSSA && VerifyMemorySSA)
2863 MSSA->verifyMemorySSA();
2864
2865 return Changed;
2866}
2867
2868bool GVNPass::processBlock(BasicBlock *BB) {
2869 if (DeadBlocks.count(key: BB))
2870 return false;
2871
2872 bool ChangedFunction = false;
2873
2874 // Since we may not have visited the input blocks of the phis, we can't
2875 // use our normal hash approach for phis. Instead, simply look for
2876 // obvious duplicates. The first pass of GVN will tend to create
2877 // identical phis, and the second or later passes can eliminate them.
2878 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2879 ChangedFunction |= EliminateDuplicatePHINodes(BB, ToRemove&: PHINodesToRemove);
2880 for (PHINode *PN : PHINodesToRemove) {
2881 removeInstruction(I: PN);
2882 }
2883 for (Instruction &Inst : make_early_inc_range(Range&: *BB))
2884 ChangedFunction |= processInstruction(I: &Inst);
2885 return ChangedFunction;
2886}
2887
2888// Instantiate an expression in a predecessor that lacked it.
2889bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2890 BasicBlock *Curr, unsigned int ValNo) {
2891 // Because we are going top-down through the block, all value numbers
2892 // will be available in the predecessor by the time we need them. Any
2893 // that weren't originally present will have been instantiated earlier
2894 // in this loop.
2895 bool Success = true;
2896 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
2897 Value *Op = Instr->getOperand(i: I);
2898 if (isa<Argument>(Val: Op) || isa<Constant>(Val: Op) || isa<GlobalValue>(Val: Op))
2899 continue;
2900 // This could be a newly inserted instruction, in which case, we won't
2901 // find a value number, and should give up before we hurt ourselves.
2902 // FIXME: Rewrite the infrastructure to let it easier to value number
2903 // and process newly inserted instructions.
2904 if (!VN.exists(V: Op)) {
2905 Success = false;
2906 break;
2907 }
2908 uint32_t TValNo =
2909 VN.phiTranslate(Pred, PhiBlock: Curr, Num: VN.lookup(V: Op), GVN&: *this);
2910 if (Value *V = findLeader(BB: Pred, Num: TValNo)) {
2911 Instr->setOperand(i: I, Val: V);
2912 } else {
2913 Success = false;
2914 break;
2915 }
2916 }
2917
2918 // Fail out if we encounter an operand that is not available in
2919 // the PRE predecessor. This is typically because of loads which
2920 // are not value numbered precisely.
2921 if (!Success)
2922 return false;
2923
2924 Instr->insertBefore(InsertPos: Pred->getTerminator()->getIterator());
2925 Instr->setName(Instr->getName() + ".pre");
2926 Instr->setDebugLoc(Instr->getDebugLoc());
2927
2928 ICF->insertInstructionTo(Inst: Instr, BB: Pred);
2929
2930 unsigned Num = VN.lookupOrAdd(V: Instr);
2931 VN.add(V: Instr, Num);
2932
2933 // Update the availability map to include the new instruction.
2934 LeaderTable.insert(N: Num, V: Instr, BB: Pred);
2935 return true;
2936}
2937
2938bool GVNPass::performScalarPRE(Instruction *CurInst) {
2939 if (isa<AllocaInst>(Val: CurInst) || CurInst->isTerminator() ||
2940 isa<PHINode>(Val: CurInst) || CurInst->getType()->isVoidTy() ||
2941 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2942 CurInst->getType()->isTokenLikeTy())
2943 return false;
2944
2945 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2946 // sinking the compare again, and it would force the code generator to
2947 // move the i1 from processor flags or predicate registers into a general
2948 // purpose register.
2949 if (isa<CmpInst>(Val: CurInst))
2950 return false;
2951
2952 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2953 // sinking the addressing mode computation back to its uses. Extending the
2954 // GEP's live range increases the register pressure, and therefore it can
2955 // introduce unnecessary spills.
2956 //
2957 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2958 // to the load by moving it to the predecessor block if necessary.
2959 if (isa<GetElementPtrInst>(Val: CurInst))
2960 return false;
2961
2962 if (auto *CallB = dyn_cast<CallBase>(Val: CurInst)) {
2963 // We don't currently value number ANY inline asm calls.
2964 if (CallB->isInlineAsm())
2965 return false;
2966 }
2967
2968 uint32_t ValNo = VN.lookup(V: CurInst);
2969
2970 // Look for the predecessors for PRE opportunities. We're
2971 // only trying to solve the basic diamond case, where
2972 // a value is computed in the successor and one predecessor,
2973 // but not the other. We also explicitly disallow cases
2974 // where the successor is its own predecessor, because they're
2975 // more complicated to get right.
2976 unsigned NumWith = 0;
2977 unsigned NumWithout = 0;
2978 BasicBlock *PREPred = nullptr;
2979 BasicBlock *CurrentBlock = CurInst->getParent();
2980
2981 // Update the RPO numbers for this function.
2982 if (InvalidBlockRPONumbers)
2983 assignBlockRPONumber(F&: *CurrentBlock->getParent());
2984
2985 SmallVector<std::pair<Value *, BasicBlock *>, 8> PredMap;
2986 for (BasicBlock *P : predecessors(BB: CurrentBlock)) {
2987 // We're not interested in PRE where blocks with predecessors that are
2988 // not reachable.
2989 if (!DT->isReachableFromEntry(A: P)) {
2990 NumWithout = 2;
2991 break;
2992 }
2993 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2994 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
2995 "Invalid BlockRPONumber map.");
2996 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
2997 NumWithout = 2;
2998 break;
2999 }
3000
3001 uint32_t TValNo = VN.phiTranslate(Pred: P, PhiBlock: CurrentBlock, Num: ValNo, GVN&: *this);
3002 Value *PredV = findLeader(BB: P, Num: TValNo);
3003 if (!PredV) {
3004 PredMap.push_back(Elt: std::make_pair(x: static_cast<Value *>(nullptr), y&: P));
3005 PREPred = P;
3006 ++NumWithout;
3007 } else if (PredV == CurInst) {
3008 // CurInst dominates this predecessor.
3009 NumWithout = 2;
3010 break;
3011 } else {
3012 PredMap.push_back(Elt: std::make_pair(x&: PredV, y&: P));
3013 ++NumWith;
3014 }
3015 }
3016
3017 // Don't do PRE when it might increase code size, i.e. when
3018 // we would need to insert instructions in more than one pred.
3019 if (NumWithout > 1 || NumWith == 0)
3020 return false;
3021
3022 // We may have a case where all predecessors have the instruction,
3023 // and we just need to insert a phi node. Otherwise, perform
3024 // insertion.
3025 Instruction *PREInstr = nullptr;
3026
3027 if (NumWithout != 0) {
3028 if (!isSafeToSpeculativelyExecute(I: CurInst)) {
3029 // It is only valid to insert a new instruction if the current instruction
3030 // is always executed. An instruction with implicit control flow could
3031 // prevent us from doing it. If we cannot speculate the execution, then
3032 // PRE should be prohibited.
3033 if (ICF->isDominatedByICFIFromSameBlock(Insn: CurInst))
3034 return false;
3035 }
3036
3037 // Don't do PRE across indirect branch.
3038 if (isa<IndirectBrInst>(Val: PREPred->getTerminator()))
3039 return false;
3040
3041 // We can't do PRE safely on a critical edge, so instead we schedule
3042 // the edge to be split and perform the PRE the next time we iterate
3043 // on the function.
3044 unsigned SuccNum = GetSuccessorNumber(BB: PREPred, Succ: CurrentBlock);
3045 if (isCriticalEdge(TI: PREPred->getTerminator(), SuccNum)) {
3046 ToSplit.push_back(Elt: std::make_pair(x: PREPred->getTerminator(), y&: SuccNum));
3047 return false;
3048 }
3049 // We need to insert somewhere, so let's give it a shot.
3050 PREInstr = CurInst->clone();
3051 if (!performScalarPREInsertion(Instr: PREInstr, Pred: PREPred, Curr: CurrentBlock, ValNo)) {
3052 // If we failed insertion, make sure we remove the instruction.
3053#ifndef NDEBUG
3054 verifyRemoved(PREInstr);
3055#endif
3056 PREInstr->deleteValue();
3057 return false;
3058 }
3059 }
3060
3061 // Either we should have filled in the PRE instruction, or we should
3062 // not have needed insertions.
3063 assert(PREInstr != nullptr || NumWithout == 0);
3064
3065 ++NumGVNPRE;
3066
3067 // Create a PHI to make the value available in this block.
3068 PHINode *Phi = PHINode::Create(Ty: CurInst->getType(), NumReservedValues: PredMap.size(),
3069 NameStr: CurInst->getName() + ".pre-phi");
3070 Phi->insertBefore(InsertPos: CurrentBlock->begin());
3071 for (auto &[V, BB] : PredMap) {
3072 if (V) {
3073 // If we use an existing value in this phi, we have to patch the original
3074 // value because the phi will be used to replace a later value.
3075 patchReplacementInstruction(I: CurInst, Repl: V);
3076 Phi->addIncoming(V, BB);
3077 } else
3078 Phi->addIncoming(V: PREInstr, BB: PREPred);
3079 }
3080
3081 VN.add(V: Phi, Num: ValNo);
3082 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3083 // be changed, so erase the related stale entries in phi translate cache.
3084 VN.eraseTranslateCacheEntry(Num: ValNo, CurrBlock: *CurrentBlock);
3085 LeaderTable.insert(N: ValNo, V: Phi, BB: CurrentBlock);
3086 Phi->setDebugLoc(CurInst->getDebugLoc());
3087 CurInst->replaceAllUsesWith(V: Phi);
3088 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3089 MD->invalidateCachedPointerInfo(Ptr: Phi);
3090 LeaderTable.erase(N: ValNo, I: CurInst, BB: CurrentBlock);
3091
3092 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3093 removeInstruction(I: CurInst);
3094 ++NumGVNInstr;
3095
3096 return true;
3097}
3098
3099/// Perform a purely local form of PRE that looks for diamond
3100/// control flow patterns and attempts to perform simple PRE at the join point.
3101bool GVNPass::performPRE(Function &F) {
3102 bool Changed = false;
3103 for (BasicBlock *CurrentBlock : depth_first(G: &F.getEntryBlock())) {
3104 // Nothing to PRE in the entry block.
3105 if (CurrentBlock == &F.getEntryBlock())
3106 continue;
3107
3108 // Don't perform PRE on an EH pad.
3109 if (CurrentBlock->isEHPad())
3110 continue;
3111
3112 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3113 BE = CurrentBlock->end();
3114 BI != BE;) {
3115 Instruction *CurInst = &*BI++;
3116 Changed |= performScalarPRE(CurInst);
3117 }
3118 }
3119
3120 if (splitCriticalEdges())
3121 Changed = true;
3122
3123 return Changed;
3124}
3125
3126/// Split the critical edge connecting the given two blocks, and return
3127/// the block inserted to the critical edge.
3128BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3129 // GVN does not require loop-simplify, do not try to preserve it if it is not
3130 // possible.
3131 BasicBlock *BB = SplitCriticalEdge(
3132 Src: Pred, Dst: Succ,
3133 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3134 if (BB) {
3135 if (MD)
3136 MD->invalidateCachedPredecessors();
3137 InvalidBlockRPONumbers = true;
3138 }
3139 return BB;
3140}
3141
3142/// Split critical edges found during the previous
3143/// iteration that may enable further optimization.
3144bool GVNPass::splitCriticalEdges() {
3145 if (ToSplit.empty())
3146 return false;
3147
3148 bool Changed = false;
3149 do {
3150 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3151 Changed |= SplitCriticalEdge(TI: Edge.first, SuccNum: Edge.second,
3152 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3153 nullptr;
3154 } while (!ToSplit.empty());
3155 if (Changed) {
3156 if (MD)
3157 MD->invalidateCachedPredecessors();
3158 InvalidBlockRPONumbers = true;
3159 }
3160 return Changed;
3161}
3162
3163/// Executes one iteration of GVN.
3164bool GVNPass::iterateOnFunction(Function &F) {
3165 cleanupGlobalSets();
3166
3167 // Top-down walk of the dominator tree.
3168 bool Changed = false;
3169 // Needed for value numbering with phi construction to work.
3170 // RPOT walks the graph in its constructor and will not be invalidated during
3171 // processBlock.
3172 ReversePostOrderTraversal<Function *> RPOT(&F);
3173
3174 for (BasicBlock *BB : RPOT)
3175 Changed |= processBlock(BB);
3176
3177 return Changed;
3178}
3179
3180void GVNPass::cleanupGlobalSets() {
3181 VN.clear();
3182 LeaderTable.clear();
3183 BlockRPONumber.clear();
3184 ICF->clear();
3185 InvalidBlockRPONumbers = true;
3186}
3187
3188void GVNPass::removeInstruction(Instruction *I) {
3189 VN.erase(V: I);
3190 if (MD) MD->removeInstruction(InstToRemove: I);
3191 if (MSSAU)
3192 MSSAU->removeMemoryAccess(I);
3193#ifndef NDEBUG
3194 verifyRemoved(I);
3195#endif
3196 ICF->removeInstruction(Inst: I);
3197 I->eraseFromParent();
3198}
3199
3200/// Verify that the specified instruction does not occur in our
3201/// internal data structures.
3202void GVNPass::verifyRemoved(const Instruction *Inst) const {
3203 VN.verifyRemoved(V: Inst);
3204 LeaderTable.verifyRemoved(V: Inst);
3205}
3206
3207/// BB is declared dead, which implied other blocks become dead as well. This
3208/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3209/// live successors, update their phi nodes by replacing the operands
3210/// corresponding to dead blocks with UndefVal.
3211void GVNPass::addDeadBlock(BasicBlock *BB) {
3212 SmallVector<BasicBlock *, 4> NewDead;
3213 SmallSetVector<BasicBlock *, 4> DF;
3214
3215 NewDead.push_back(Elt: BB);
3216 while (!NewDead.empty()) {
3217 BasicBlock *D = NewDead.pop_back_val();
3218 if (DeadBlocks.count(key: D))
3219 continue;
3220
3221 // All blocks dominated by D are dead.
3222 SmallVector<BasicBlock *, 8> Dom;
3223 DT->getDescendants(R: D, Result&: Dom);
3224 DeadBlocks.insert_range(R&: Dom);
3225
3226 // Figure out the dominance-frontier(D).
3227 for (BasicBlock *B : Dom) {
3228 for (BasicBlock *S : successors(BB: B)) {
3229 if (DeadBlocks.count(key: S))
3230 continue;
3231
3232 bool AllPredDead = true;
3233 for (BasicBlock *P : predecessors(BB: S))
3234 if (!DeadBlocks.count(key: P)) {
3235 AllPredDead = false;
3236 break;
3237 }
3238
3239 if (!AllPredDead) {
3240 // S could be proved dead later on. That is why we don't update phi
3241 // operands at this moment.
3242 DF.insert(X: S);
3243 } else {
3244 // While S is not dominated by D, it is dead by now. This could take
3245 // place if S already have a dead predecessor before D is declared
3246 // dead.
3247 NewDead.push_back(Elt: S);
3248 }
3249 }
3250 }
3251 }
3252
3253 // For the dead blocks' live successors, update their phi nodes by replacing
3254 // the operands corresponding to dead blocks with UndefVal.
3255 for (BasicBlock *B : DF) {
3256 if (DeadBlocks.count(key: B))
3257 continue;
3258
3259 // First, split the critical edges. This might also create additional blocks
3260 // to preserve LoopSimplify form and adjust edges accordingly.
3261 SmallVector<BasicBlock *, 4> Preds(predecessors(BB: B));
3262 for (BasicBlock *P : Preds) {
3263 if (!DeadBlocks.count(key: P))
3264 continue;
3265
3266 if (is_contained(Range: successors(BB: P), Element: B) &&
3267 isCriticalEdge(TI: P->getTerminator(), Succ: B)) {
3268 if (BasicBlock *S = splitCriticalEdges(Pred: P, Succ: B))
3269 DeadBlocks.insert(X: P = S);
3270 }
3271 }
3272
3273 // Now poison the incoming values from the dead predecessors.
3274 for (BasicBlock *P : predecessors(BB: B)) {
3275 if (!DeadBlocks.count(key: P))
3276 continue;
3277 for (PHINode &Phi : B->phis()) {
3278 Phi.setIncomingValueForBlock(BB: P, V: PoisonValue::get(T: Phi.getType()));
3279 if (MD)
3280 MD->invalidateCachedPointerInfo(Ptr: &Phi);
3281 }
3282 }
3283 }
3284}
3285
3286// If the given branch is recognized as a foldable branch (i.e. conditional
3287// branch with constant condition), it will perform following analyses and
3288// transformation.
3289// 1) If the dead out-coming edge is a critical-edge, split it. Let
3290// R be the target of the dead out-coming edge.
3291// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3292// edge. The result of this step will be {X| X is dominated by R}
3293// 2) Identify those blocks which haves at least one dead predecessor. The
3294// result of this step will be dominance-frontier(R).
3295// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3296// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3297//
3298// Return true iff *NEW* dead code are found.
3299bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3300 if (!BI || BI->isUnconditional())
3301 return false;
3302
3303 // If a branch has two identical successors, we cannot declare either dead.
3304 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
3305 return false;
3306
3307 ConstantInt *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition());
3308 if (!Cond)
3309 return false;
3310
3311 BasicBlock *DeadRoot =
3312 Cond->getZExtValue() ? BI->getSuccessor(i: 1) : BI->getSuccessor(i: 0);
3313 if (DeadBlocks.count(key: DeadRoot))
3314 return false;
3315
3316 if (!DeadRoot->getSinglePredecessor())
3317 DeadRoot = splitCriticalEdges(Pred: BI->getParent(), Succ: DeadRoot);
3318
3319 addDeadBlock(BB: DeadRoot);
3320 return true;
3321}
3322
3323// performPRE() will trigger assert if it comes across an instruction without
3324// associated val-num. As it normally has far more live instructions than dead
3325// instructions, it makes more sense just to "fabricate" a val-number for the
3326// dead code than checking if instruction involved is dead or not.
3327void GVNPass::assignValNumForDeadCode() {
3328 for (BasicBlock *BB : DeadBlocks) {
3329 for (Instruction &Inst : *BB) {
3330 unsigned ValNum = VN.lookupOrAdd(V: &Inst);
3331 LeaderTable.insert(N: ValNum, V: &Inst, BB);
3332 }
3333 }
3334}
3335
3336class llvm::gvn::GVNLegacyPass : public FunctionPass {
3337public:
3338 static char ID; // Pass identification, replacement for typeid.
3339
3340 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3341 bool MemSSAAnalysis = GVNEnableMemorySSA)
3342 : FunctionPass(ID), Impl(GVNOptions()
3343 .setMemDep(MemDepAnalysis)
3344 .setMemorySSA(MemSSAAnalysis)) {
3345 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3346 }
3347
3348 bool runOnFunction(Function &F) override {
3349 if (skipFunction(F))
3350 return false;
3351
3352 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3353 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3354 MSSAWP = &getAnalysis<MemorySSAWrapperPass>();
3355
3356 return Impl.runImpl(
3357 F, RunAC&: getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3358 RunDT&: getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3359 RunTLI: getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3360 RunAA&: getAnalysis<AAResultsWrapperPass>().getAAResults(),
3361 RunMD: Impl.isMemDepEnabled()
3362 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3363 : nullptr,
3364 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3365 RunORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3366 MSSA: MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3367 }
3368
3369 void getAnalysisUsage(AnalysisUsage &AU) const override {
3370 AU.addRequired<AssumptionCacheTracker>();
3371 AU.addRequired<DominatorTreeWrapperPass>();
3372 AU.addRequired<TargetLibraryInfoWrapperPass>();
3373 AU.addRequired<LoopInfoWrapperPass>();
3374 if (Impl.isMemDepEnabled())
3375 AU.addRequired<MemoryDependenceWrapperPass>();
3376 AU.addRequired<AAResultsWrapperPass>();
3377 AU.addPreserved<DominatorTreeWrapperPass>();
3378 AU.addPreserved<GlobalsAAWrapperPass>();
3379 AU.addPreserved<TargetLibraryInfoWrapperPass>();
3380 AU.addPreserved<LoopInfoWrapperPass>();
3381 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3382 AU.addPreserved<MemorySSAWrapperPass>();
3383 if (Impl.isMemorySSAEnabled())
3384 AU.addRequired<MemorySSAWrapperPass>();
3385 }
3386
3387private:
3388 GVNPass Impl;
3389};
3390
3391char GVNLegacyPass::ID = 0;
3392
3393INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3394INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3395INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3396INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
3397INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3398INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3399INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3400INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3401INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3402INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3403
3404// The public interface to this file...
3405FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); }
3406