1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/Scalar/GVN.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DepthFirstIterator.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/AliasAnalysis.h"
29#include "llvm/Analysis/AssumeBundleQueries.h"
30#include "llvm/Analysis/AssumptionCache.h"
31#include "llvm/Analysis/CFG.h"
32#include "llvm/Analysis/DomTreeUpdater.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/InstructionPrecedenceTracking.h"
35#include "llvm/Analysis/InstructionSimplify.h"
36#include "llvm/Analysis/Loads.h"
37#include "llvm/Analysis/LoopInfo.h"
38#include "llvm/Analysis/MemoryBuiltins.h"
39#include "llvm/Analysis/MemoryDependenceAnalysis.h"
40#include "llvm/Analysis/MemorySSA.h"
41#include "llvm/Analysis/MemorySSAUpdater.h"
42#include "llvm/Analysis/OptimizationRemarkEmitter.h"
43#include "llvm/Analysis/PHITransAddr.h"
44#include "llvm/Analysis/TargetLibraryInfo.h"
45#include "llvm/Analysis/ValueTracking.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/IR/PatternMatch.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/InitializePasses.h"
66#include "llvm/Pass.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include "llvm/Transforms/Utils/SSAUpdater.h"
76#include "llvm/Transforms/Utils/VNCoercion.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(Val: true), cl::Hidden);
109static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(Val: true));
110static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111 cl::init(Val: true));
112static cl::opt<bool>
113GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 cl::init(Val: false));
115static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(Val: true));
116static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
117 cl::init(Val: false));
118
119static cl::opt<uint32_t> MaxNumDeps(
120 "gvn-max-num-deps", cl::Hidden, cl::init(Val: 100),
121 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
122
123// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
124static cl::opt<uint32_t> MaxBBSpeculations(
125 "gvn-max-block-speculations", cl::Hidden, cl::init(Val: 600),
126 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
127 "into) when deducing if a value is fully available or not in GVN "
128 "(default = 600)"));
129
130static cl::opt<uint32_t> MaxNumVisitedInsts(
131 "gvn-max-num-visited-insts", cl::Hidden, cl::init(Val: 100),
132 cl::desc("Max number of visited instructions when trying to find "
133 "dominating value of select dependency (default = 100)"));
134
135static cl::opt<uint32_t> MaxNumInsnsPerBlock(
136 "gvn-max-num-insns", cl::Hidden, cl::init(Val: 100),
137 cl::desc("Max number of instructions to scan in each basic block in GVN "
138 "(default = 100)"));
139
140struct llvm::GVNPass::Expression {
141 uint32_t Opcode;
142 bool Commutative = false;
143 // The type is not necessarily the result type of the expression, it may be
144 // any additional type needed to disambiguate the expression.
145 Type *Ty = nullptr;
146 SmallVector<uint32_t, 4> VarArgs;
147
148 AttributeList Attrs;
149
150 Expression(uint32_t Op = ~2U) : Opcode(Op) {}
151
152 bool operator==(const Expression &Other) const {
153 if (Opcode != Other.Opcode)
154 return false;
155 if (Opcode == ~0U || Opcode == ~1U)
156 return true;
157 if (Ty != Other.Ty)
158 return false;
159 if (VarArgs != Other.VarArgs)
160 return false;
161 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
162 !Attrs.intersectWith(C&: Ty->getContext(), Other: Other.Attrs).has_value())
163 return false;
164 return true;
165 }
166
167 friend hash_code hash_value(const Expression &Value) {
168 return hash_combine(args: Value.Opcode, args: Value.Ty,
169 args: hash_combine_range(R: Value.VarArgs));
170 }
171};
172
173template <> struct llvm::DenseMapInfo<GVNPass::Expression> {
174 static inline GVNPass::Expression getEmptyKey() { return ~0U; }
175 static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
176
177 static unsigned getHashValue(const GVNPass::Expression &E) {
178 using llvm::hash_value;
179
180 return static_cast<unsigned>(hash_value(Value: E));
181 }
182
183 static bool isEqual(const GVNPass::Expression &LHS,
184 const GVNPass::Expression &RHS) {
185 return LHS == RHS;
186 }
187};
188
189/// Represents a particular available value that we know how to materialize.
190/// Materialization of an AvailableValue never fails. An AvailableValue is
191/// implicitly associated with a rematerialization point which is the
192/// location of the instruction from which it was formed.
193struct llvm::gvn::AvailableValue {
194 enum class ValType {
195 SimpleVal, // A simple offsetted value that is accessed.
196 LoadVal, // A value produced by a load.
197 MemIntrin, // A memory intrinsic which is loaded from.
198 UndefVal, // A UndefValue representing a value from dead block (which
199 // is not yet physically removed from the CFG).
200 SelectVal, // A pointer select which is loaded from and for which the load
201 // can be replace by a value select.
202 };
203
204 /// Val - The value that is live out of the block.
205 Value *Val;
206 /// Kind of the live-out value.
207 ValType Kind;
208
209 /// Offset - The byte offset in Val that is interesting for the load query.
210 unsigned Offset = 0;
211 /// V1, V2 - The dominating non-clobbered values of SelectVal.
212 Value *V1 = nullptr, *V2 = nullptr;
213
214 static AvailableValue get(Value *V, unsigned Offset = 0) {
215 AvailableValue Res;
216 Res.Val = V;
217 Res.Kind = ValType::SimpleVal;
218 Res.Offset = Offset;
219 return Res;
220 }
221
222 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
223 AvailableValue Res;
224 Res.Val = MI;
225 Res.Kind = ValType::MemIntrin;
226 Res.Offset = Offset;
227 return Res;
228 }
229
230 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
231 AvailableValue Res;
232 Res.Val = Load;
233 Res.Kind = ValType::LoadVal;
234 Res.Offset = Offset;
235 return Res;
236 }
237
238 static AvailableValue getUndef() {
239 AvailableValue Res;
240 Res.Val = nullptr;
241 Res.Kind = ValType::UndefVal;
242 Res.Offset = 0;
243 return Res;
244 }
245
246 static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
247 AvailableValue Res;
248 Res.Val = Sel;
249 Res.Kind = ValType::SelectVal;
250 Res.Offset = 0;
251 Res.V1 = V1;
252 Res.V2 = V2;
253 return Res;
254 }
255
256 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
257 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
258 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
259 bool isUndefValue() const { return Kind == ValType::UndefVal; }
260 bool isSelectValue() const { return Kind == ValType::SelectVal; }
261
262 Value *getSimpleValue() const {
263 assert(isSimpleValue() && "Wrong accessor");
264 return Val;
265 }
266
267 LoadInst *getCoercedLoadValue() const {
268 assert(isCoercedLoadValue() && "Wrong accessor");
269 return cast<LoadInst>(Val);
270 }
271
272 MemIntrinsic *getMemIntrinValue() const {
273 assert(isMemIntrinValue() && "Wrong accessor");
274 return cast<MemIntrinsic>(Val);
275 }
276
277 SelectInst *getSelectValue() const {
278 assert(isSelectValue() && "Wrong accessor");
279 return cast<SelectInst>(Val);
280 }
281
282 /// Emit code at the specified insertion point to adjust the value defined
283 /// here to the specified type. This handles various coercion cases.
284 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
285};
286
287/// Represents an AvailableValue which can be rematerialized at the end of
288/// the associated BasicBlock.
289struct llvm::gvn::AvailableValueInBlock {
290 /// BB - The basic block in question.
291 BasicBlock *BB = nullptr;
292
293 /// AV - The actual available value.
294 AvailableValue AV;
295
296 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
297 AvailableValueInBlock Res;
298 Res.BB = BB;
299 Res.AV = std::move(AV);
300 return Res;
301 }
302
303 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
304 unsigned Offset = 0) {
305 return get(BB, AV: AvailableValue::get(V, Offset));
306 }
307
308 static AvailableValueInBlock getUndef(BasicBlock *BB) {
309 return get(BB, AV: AvailableValue::getUndef());
310 }
311
312 static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
313 Value *V1, Value *V2) {
314 return get(BB, AV: AvailableValue::getSelect(Sel, V1, V2));
315 }
316
317 /// Emit code at the end of this block to adjust the value defined here to
318 /// the specified type. This handles various coercion cases.
319 Value *MaterializeAdjustedValue(LoadInst *Load) const {
320 return AV.MaterializeAdjustedValue(Load, InsertPt: BB->getTerminator());
321 }
322};
323
324//===----------------------------------------------------------------------===//
325// ValueTable Internal Functions
326//===----------------------------------------------------------------------===//
327
328GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
329 Expression E;
330 E.Ty = I->getType();
331 E.Opcode = I->getOpcode();
332 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Val: I)) {
333 // gc.relocate is 'special' call: its second and third operands are
334 // not real values, but indices into statepoint's argument list.
335 // Use the refered to values for purposes of identity.
336 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getOperand(i_nocapture: 0)));
337 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getBasePtr()));
338 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getDerivedPtr()));
339 } else {
340 for (Use &Op : I->operands())
341 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
342 }
343 if (I->isCommutative()) {
344 // Ensure that commutative instructions that only differ by a permutation
345 // of their operands get the same value number by sorting the operand value
346 // numbers. Since commutative operands are the 1st two operands it is more
347 // efficient to sort by hand rather than using, say, std::sort.
348 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
349 if (E.VarArgs[0] > E.VarArgs[1])
350 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
351 E.Commutative = true;
352 }
353
354 if (auto *C = dyn_cast<CmpInst>(Val: I)) {
355 // Sort the operand value numbers so x<y and y>x get the same value number.
356 CmpInst::Predicate Predicate = C->getPredicate();
357 if (E.VarArgs[0] > E.VarArgs[1]) {
358 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
359 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
360 }
361 E.Opcode = (C->getOpcode() << 8) | Predicate;
362 E.Commutative = true;
363 } else if (auto *IVI = dyn_cast<InsertValueInst>(Val: I)) {
364 E.VarArgs.append(in_start: IVI->idx_begin(), in_end: IVI->idx_end());
365 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
366 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
367 E.VarArgs.append(in_start: ShuffleMask.begin(), in_end: ShuffleMask.end());
368 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
369 E.Attrs = CB->getAttributes();
370 }
371
372 return E;
373}
374
375GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
376 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
377 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
378 "Not a comparison!");
379 Expression E;
380 E.Ty = CmpInst::makeCmpResultType(opnd_type: LHS->getType());
381 E.VarArgs.push_back(Elt: lookupOrAdd(V: LHS));
382 E.VarArgs.push_back(Elt: lookupOrAdd(V: RHS));
383
384 // Sort the operand value numbers so x<y and y>x get the same value number.
385 if (E.VarArgs[0] > E.VarArgs[1]) {
386 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
387 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
388 }
389 E.Opcode = (Opcode << 8) | Predicate;
390 E.Commutative = true;
391 return E;
392}
393
394GVNPass::Expression
395GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
396 assert(EI && "Not an ExtractValueInst?");
397 Expression E;
398 E.Ty = EI->getType();
399 E.Opcode = 0;
400
401 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand());
402 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
403 // EI is an extract from one of our with.overflow intrinsics. Synthesize
404 // a semantically equivalent expression instead of an extract value
405 // expression.
406 E.Opcode = WO->getBinaryOp();
407 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getLHS()));
408 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getRHS()));
409 return E;
410 }
411
412 // Not a recognised intrinsic. Fall back to producing an extract value
413 // expression.
414 E.Opcode = EI->getOpcode();
415 for (Use &Op : EI->operands())
416 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
417
418 append_range(C&: E.VarArgs, R: EI->indices());
419
420 return E;
421}
422
423GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
424 Expression E;
425 Type *PtrTy = GEP->getType()->getScalarType();
426 const DataLayout &DL = GEP->getDataLayout();
427 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: PtrTy);
428 SmallMapVector<Value *, APInt, 4> VariableOffsets;
429 APInt ConstantOffset(BitWidth, 0);
430 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
431 // Convert into offset representation, to recognize equivalent address
432 // calculations that use different type encoding.
433 LLVMContext &Context = GEP->getContext();
434 E.Opcode = GEP->getOpcode();
435 E.Ty = nullptr;
436 E.VarArgs.push_back(Elt: lookupOrAdd(V: GEP->getPointerOperand()));
437 for (const auto &[V, Scale] : VariableOffsets) {
438 E.VarArgs.push_back(Elt: lookupOrAdd(V));
439 E.VarArgs.push_back(Elt: lookupOrAdd(V: ConstantInt::get(Context, V: Scale)));
440 }
441 if (!ConstantOffset.isZero())
442 E.VarArgs.push_back(
443 Elt: lookupOrAdd(V: ConstantInt::get(Context, V: ConstantOffset)));
444 } else {
445 // If converting to offset representation fails (for scalable vectors),
446 // fall back to type-based implementation.
447 E.Opcode = GEP->getOpcode();
448 E.Ty = GEP->getSourceElementType();
449 for (Use &Op : GEP->operands())
450 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
451 }
452 return E;
453}
454
455//===----------------------------------------------------------------------===//
456// ValueTable External Functions
457//===----------------------------------------------------------------------===//
458
459GVNPass::ValueTable::ValueTable() = default;
460GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
461GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
462GVNPass::ValueTable::~ValueTable() = default;
463GVNPass::ValueTable &
464GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
465
466/// add - Insert a value into the table with a specified value number.
467void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
468 ValueNumbering.insert(KV: std::make_pair(x&: V, y&: Num));
469 if (PHINode *PN = dyn_cast<PHINode>(Val: V))
470 NumberingPhi[Num] = PN;
471}
472
473/// Include the incoming memory state into the hash of the expression for the
474/// given instruction. If the incoming memory state is:
475/// * LiveOnEntry, add the value number of the entry block,
476/// * a MemoryPhi, add the value number of the basic block corresponding to that
477/// MemoryPhi,
478/// * a MemoryDef, add the value number of the memory setting instruction.
479void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
480 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
481 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
482 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
483 Exp.VarArgs.push_back(Elt: lookupOrAdd(MA));
484}
485
486uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
487 // FIXME: Currently the calls which may access the thread id may
488 // be considered as not accessing the memory. But this is
489 // problematic for coroutines, since coroutines may resume in a
490 // different thread. So we disable the optimization here for the
491 // correctness. However, it may block many other correct
492 // optimizations. Revert this one when we detect the memory
493 // accessing kind more precisely.
494 if (C->getFunction()->isPresplitCoroutine()) {
495 ValueNumbering[C] = NextValueNumber;
496 return NextValueNumber++;
497 }
498
499 // Do not combine convergent calls since they implicitly depend on the set of
500 // threads that is currently executing, and they might be in different basic
501 // blocks.
502 if (C->isConvergent()) {
503 ValueNumbering[C] = NextValueNumber;
504 return NextValueNumber++;
505 }
506
507 if (AA->doesNotAccessMemory(Call: C)) {
508 Expression Exp = createExpr(I: C);
509 uint32_t E = assignExpNewValueNum(Exp).first;
510 ValueNumbering[C] = E;
511 return E;
512 }
513
514 if (MD && AA->onlyReadsMemory(Call: C)) {
515 Expression Exp = createExpr(I: C);
516 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
517 if (IsValNumNew) {
518 ValueNumbering[C] = E;
519 return E;
520 }
521
522 MemDepResult LocalDep = MD->getDependency(QueryInst: C);
523
524 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
525 ValueNumbering[C] = NextValueNumber;
526 return NextValueNumber++;
527 }
528
529 if (LocalDep.isDef()) {
530 // For masked load/store intrinsics, the local_dep may actually be
531 // a normal load or store instruction.
532 CallInst *LocalDepCall = dyn_cast<CallInst>(Val: LocalDep.getInst());
533
534 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
535 ValueNumbering[C] = NextValueNumber;
536 return NextValueNumber++;
537 }
538
539 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
540 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
541 uint32_t LocalDepCallVN = lookupOrAdd(V: LocalDepCall->getArgOperand(i: I));
542 if (CVN != LocalDepCallVN) {
543 ValueNumbering[C] = NextValueNumber;
544 return NextValueNumber++;
545 }
546 }
547
548 uint32_t V = lookupOrAdd(V: LocalDepCall);
549 ValueNumbering[C] = V;
550 return V;
551 }
552
553 // Non-local case.
554 const MemoryDependenceResults::NonLocalDepInfo &Deps =
555 MD->getNonLocalCallDependency(QueryCall: C);
556 // FIXME: Move the checking logic to MemDep!
557 CallInst *CDep = nullptr;
558
559 // Check to see if we have a single dominating call instruction that is
560 // identical to C.
561 for (const NonLocalDepEntry &I : Deps) {
562 if (I.getResult().isNonLocal())
563 continue;
564
565 // We don't handle non-definitions. If we already have a call, reject
566 // instruction dependencies.
567 if (!I.getResult().isDef() || CDep != nullptr) {
568 CDep = nullptr;
569 break;
570 }
571
572 CallInst *NonLocalDepCall = dyn_cast<CallInst>(Val: I.getResult().getInst());
573 // FIXME: All duplicated with non-local case.
574 if (NonLocalDepCall && DT->properlyDominates(A: I.getBB(), B: C->getParent())) {
575 CDep = NonLocalDepCall;
576 continue;
577 }
578
579 CDep = nullptr;
580 break;
581 }
582
583 if (!CDep) {
584 ValueNumbering[C] = NextValueNumber;
585 return NextValueNumber++;
586 }
587
588 if (CDep->arg_size() != C->arg_size()) {
589 ValueNumbering[C] = NextValueNumber;
590 return NextValueNumber++;
591 }
592 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
593 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
594 uint32_t CDepVN = lookupOrAdd(V: CDep->getArgOperand(i: I));
595 if (CVN != CDepVN) {
596 ValueNumbering[C] = NextValueNumber;
597 return NextValueNumber++;
598 }
599 }
600
601 uint32_t V = lookupOrAdd(V: CDep);
602 ValueNumbering[C] = V;
603 return V;
604 }
605
606 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(Call: C)) {
607 Expression Exp = createExpr(I: C);
608 addMemoryStateToExp(I: C, Exp);
609 auto [V, _] = assignExpNewValueNum(Exp);
610 ValueNumbering[C] = V;
611 return V;
612 }
613
614 ValueNumbering[C] = NextValueNumber;
615 return NextValueNumber++;
616}
617
618/// Returns the value number for the specified load or store instruction.
619uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
620 if (!MSSA || !IsMSSAEnabled) {
621 ValueNumbering[I] = NextValueNumber;
622 return NextValueNumber++;
623 }
624
625 Expression Exp;
626 Exp.Ty = I->getType();
627 Exp.Opcode = I->getOpcode();
628 for (Use &Op : I->operands())
629 Exp.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
630 addMemoryStateToExp(I, Exp);
631
632 auto [V, _] = assignExpNewValueNum(Exp);
633 ValueNumbering[I] = V;
634 return V;
635}
636
637/// Returns true if a value number exists for the specified value.
638bool GVNPass::ValueTable::exists(Value *V) const {
639 return ValueNumbering.contains(Val: V);
640}
641
642uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
643 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(Val: MA)
644 ? lookupOrAdd(V: MA->getBlock())
645 : lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
646}
647
648/// lookupOrAdd - Returns the value number for the specified value, assigning
649/// it a new number if it did not have one before.
650uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
651 DenseMap<Value *, uint32_t>::iterator VI = ValueNumbering.find(Val: V);
652 if (VI != ValueNumbering.end())
653 return VI->second;
654
655 auto *I = dyn_cast<Instruction>(Val: V);
656 if (!I) {
657 ValueNumbering[V] = NextValueNumber;
658 if (isa<BasicBlock>(Val: V))
659 NumberingBB[NextValueNumber] = cast<BasicBlock>(Val: V);
660 return NextValueNumber++;
661 }
662
663 Expression Exp;
664 switch (I->getOpcode()) {
665 case Instruction::Call:
666 return lookupOrAddCall(C: cast<CallInst>(Val: I));
667 case Instruction::FNeg:
668 case Instruction::Add:
669 case Instruction::FAdd:
670 case Instruction::Sub:
671 case Instruction::FSub:
672 case Instruction::Mul:
673 case Instruction::FMul:
674 case Instruction::UDiv:
675 case Instruction::SDiv:
676 case Instruction::FDiv:
677 case Instruction::URem:
678 case Instruction::SRem:
679 case Instruction::FRem:
680 case Instruction::Shl:
681 case Instruction::LShr:
682 case Instruction::AShr:
683 case Instruction::And:
684 case Instruction::Or:
685 case Instruction::Xor:
686 case Instruction::ICmp:
687 case Instruction::FCmp:
688 case Instruction::Trunc:
689 case Instruction::ZExt:
690 case Instruction::SExt:
691 case Instruction::FPToUI:
692 case Instruction::FPToSI:
693 case Instruction::UIToFP:
694 case Instruction::SIToFP:
695 case Instruction::FPTrunc:
696 case Instruction::FPExt:
697 case Instruction::PtrToInt:
698 case Instruction::PtrToAddr:
699 case Instruction::IntToPtr:
700 case Instruction::AddrSpaceCast:
701 case Instruction::BitCast:
702 case Instruction::Select:
703 case Instruction::Freeze:
704 case Instruction::ExtractElement:
705 case Instruction::InsertElement:
706 case Instruction::ShuffleVector:
707 case Instruction::InsertValue:
708 Exp = createExpr(I);
709 break;
710 case Instruction::GetElementPtr:
711 Exp = createGEPExpr(GEP: cast<GetElementPtrInst>(Val: I));
712 break;
713 case Instruction::ExtractValue:
714 Exp = createExtractvalueExpr(EI: cast<ExtractValueInst>(Val: I));
715 break;
716 case Instruction::PHI:
717 ValueNumbering[V] = NextValueNumber;
718 NumberingPhi[NextValueNumber] = cast<PHINode>(Val: V);
719 return NextValueNumber++;
720 case Instruction::Load:
721 case Instruction::Store:
722 return computeLoadStoreVN(I);
723 default:
724 ValueNumbering[V] = NextValueNumber;
725 return NextValueNumber++;
726 }
727
728 uint32_t E = assignExpNewValueNum(Exp).first;
729 ValueNumbering[V] = E;
730 return E;
731}
732
733/// Returns the value number of the specified value. Fails if
734/// the value has not yet been numbered.
735uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
736 DenseMap<Value *, uint32_t>::const_iterator VI = ValueNumbering.find(Val: V);
737 if (Verify) {
738 assert(VI != ValueNumbering.end() && "Value not numbered?");
739 return VI->second;
740 }
741 return (VI != ValueNumbering.end()) ? VI->second : 0;
742}
743
744/// Returns the value number of the given comparison,
745/// assigning it a new number if it did not have one before. Useful when
746/// we deduced the result of a comparison, but don't immediately have an
747/// instruction realizing that comparison to hand.
748uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
749 CmpInst::Predicate Predicate,
750 Value *LHS, Value *RHS) {
751 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
752 return assignExpNewValueNum(Exp).first;
753}
754
755/// Remove all entries from the ValueTable.
756void GVNPass::ValueTable::clear() {
757 ValueNumbering.clear();
758 ExpressionNumbering.clear();
759 NumberingPhi.clear();
760 NumberingBB.clear();
761 PhiTranslateTable.clear();
762 NextValueNumber = 1;
763 Expressions.clear();
764 ExprIdx.clear();
765 NextExprNumber = 0;
766}
767
768/// Remove a value from the value numbering.
769void GVNPass::ValueTable::erase(Value *V) {
770 uint32_t Num = ValueNumbering.lookup(Val: V);
771 ValueNumbering.erase(Val: V);
772 // If V is PHINode, V <--> value number is an one-to-one mapping.
773 if (isa<PHINode>(Val: V))
774 NumberingPhi.erase(Val: Num);
775 else if (isa<BasicBlock>(Val: V))
776 NumberingBB.erase(Val: Num);
777}
778
779/// verifyRemoved - Verify that the value is removed from all internal data
780/// structures.
781void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
782 assert(!ValueNumbering.contains(V) &&
783 "Inst still occurs in value numbering map!");
784}
785
786//===----------------------------------------------------------------------===//
787// LeaderMap External Functions
788//===----------------------------------------------------------------------===//
789
790/// Push a new Value to the LeaderTable onto the list for its value number.
791void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
792 LeaderListNode &Curr = NumToLeaders[N];
793 if (!Curr.Entry.Val) {
794 Curr.Entry.Val = V;
795 Curr.Entry.BB = BB;
796 return;
797 }
798
799 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
800 Node->Entry.Val = V;
801 Node->Entry.BB = BB;
802 Node->Next = Curr.Next;
803 Curr.Next = Node;
804}
805
806/// Scan the list of values corresponding to a given
807/// value number, and remove the given instruction if encountered.
808void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
809 const BasicBlock *BB) {
810 LeaderListNode *Prev = nullptr;
811 LeaderListNode *Curr = &NumToLeaders[N];
812
813 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
814 Prev = Curr;
815 Curr = Curr->Next;
816 }
817
818 if (!Curr)
819 return;
820
821 if (Prev) {
822 Prev->Next = Curr->Next;
823 } else {
824 if (!Curr->Next) {
825 Curr->Entry.Val = nullptr;
826 Curr->Entry.BB = nullptr;
827 } else {
828 LeaderListNode *Next = Curr->Next;
829 Curr->Entry.Val = Next->Entry.Val;
830 Curr->Entry.BB = Next->Entry.BB;
831 Curr->Next = Next->Next;
832 }
833 }
834}
835
836void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
837 // Walk through the value number scope to make sure the instruction isn't
838 // ferreted away in it.
839 for (const auto &I : NumToLeaders) {
840 (void)I;
841 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
842 assert(
843 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
844 [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
845 "Inst still in value numbering scope!");
846 }
847}
848
849//===----------------------------------------------------------------------===//
850// GVN Pass
851//===----------------------------------------------------------------------===//
852
853bool GVNPass::isPREEnabled() const {
854 return Options.AllowPRE.value_or(u&: GVNEnablePRE);
855}
856
857bool GVNPass::isLoadPREEnabled() const {
858 return Options.AllowLoadPRE.value_or(u&: GVNEnableLoadPRE);
859}
860
861bool GVNPass::isLoadInLoopPREEnabled() const {
862 return Options.AllowLoadInLoopPRE.value_or(u&: GVNEnableLoadInLoopPRE);
863}
864
865bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
866 return Options.AllowLoadPRESplitBackedge.value_or(
867 u&: GVNEnableSplitBackedgeInLoadPRE);
868}
869
870bool GVNPass::isMemDepEnabled() const {
871 return Options.AllowMemDep.value_or(u&: GVNEnableMemDep);
872}
873
874bool GVNPass::isMemorySSAEnabled() const {
875 return Options.AllowMemorySSA.value_or(u&: GVNEnableMemorySSA);
876}
877
878PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
879 // FIXME: The order of evaluation of these 'getResult' calls is very
880 // significant! Re-ordering these variables will cause GVN when run alone to
881 // be less effective! We should fix memdep and basic-aa to not exhibit this
882 // behavior, but until then don't change the order here.
883 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
884 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
885 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
886 auto &AA = AM.getResult<AAManager>(IR&: F);
887 auto *MemDep =
888 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(IR&: F) : nullptr;
889 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
890 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F);
891 if (isMemorySSAEnabled() && !MSSA) {
892 assert(!MemDep &&
893 "On-demand computation of MemSSA implies that MemDep is disabled!");
894 MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F);
895 }
896 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
897 bool Changed = runImpl(F, RunAC&: AC, RunDT&: DT, RunTLI: TLI, RunAA&: AA, RunMD: MemDep, LI, ORE: &ORE,
898 MSSA: MSSA ? &MSSA->getMSSA() : nullptr);
899 if (!Changed)
900 return PreservedAnalyses::all();
901 PreservedAnalyses PA;
902 PA.preserve<DominatorTreeAnalysis>();
903 PA.preserve<TargetLibraryAnalysis>();
904 if (MSSA)
905 PA.preserve<MemorySSAAnalysis>();
906 PA.preserve<LoopAnalysis>();
907 return PA;
908}
909
910void GVNPass::printPipeline(
911 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
912 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
913 OS, MapClassName2PassName);
914
915 OS << '<';
916 if (Options.AllowPRE != std::nullopt)
917 OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
918 if (Options.AllowLoadPRE != std::nullopt)
919 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
920 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
921 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
922 << "split-backedge-load-pre;";
923 if (Options.AllowMemDep != std::nullopt)
924 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
925 if (Options.AllowMemorySSA != std::nullopt)
926 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
927 OS << '>';
928}
929
930void GVNPass::salvageAndRemoveInstruction(Instruction *I) {
931 salvageKnowledge(I, AC);
932 salvageDebugInfo(I&: *I);
933 removeInstruction(I);
934}
935
936#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
937LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
938 errs() << "{\n";
939 for (const auto &[Num, Exp] : Map) {
940 errs() << Num << "\n";
941 Exp->dump();
942 }
943 errs() << "}\n";
944}
945#endif
946
947enum class AvailabilityState : char {
948 /// We know the block *is not* fully available. This is a fixpoint.
949 Unavailable = 0,
950 /// We know the block *is* fully available. This is a fixpoint.
951 Available = 1,
952 /// We do not know whether the block is fully available or not,
953 /// but we are currently speculating that it will be.
954 /// If it would have turned out that the block was, in fact, not fully
955 /// available, this would have been cleaned up into an Unavailable.
956 SpeculativelyAvailable = 2,
957};
958
959/// Return true if we can prove that the value
960/// we're analyzing is fully available in the specified block. As we go, keep
961/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
962/// map is actually a tri-state map with the following values:
963/// 0) we know the block *is not* fully available.
964/// 1) we know the block *is* fully available.
965/// 2) we do not know whether the block is fully available or not, but we are
966/// currently speculating that it will be.
967static bool IsValueFullyAvailableInBlock(
968 BasicBlock *BB,
969 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
970 SmallVector<BasicBlock *, 32> Worklist;
971 std::optional<BasicBlock *> UnavailableBB;
972
973 // The number of times we didn't find an entry for a block in a map and
974 // optimistically inserted an entry marking block as speculatively available.
975 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
976
977#ifndef NDEBUG
978 SmallPtrSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
979 SmallVector<BasicBlock *, 32> AvailableBBs;
980#endif
981
982 Worklist.emplace_back(Args&: BB);
983 while (!Worklist.empty()) {
984 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
985 // Optimistically assume that the block is Speculatively Available and check
986 // to see if we already know about this block in one lookup.
987 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
988 FullyAvailableBlocks.try_emplace(
989 Key: CurrBB, Args: AvailabilityState::SpeculativelyAvailable);
990 AvailabilityState &State = IV.first->second;
991
992 // Did the entry already exist for this block?
993 if (!IV.second) {
994 if (State == AvailabilityState::Unavailable) {
995 UnavailableBB = CurrBB;
996 break; // Backpropagate unavailability info.
997 }
998
999#ifndef NDEBUG
1000 AvailableBBs.emplace_back(CurrBB);
1001#endif
1002 continue; // Don't recurse further, but continue processing worklist.
1003 }
1004
1005 // No entry found for block.
1006 ++NumNewNewSpeculativelyAvailableBBs;
1007 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
1008
1009 // If we have exhausted our budget, mark this block as unavailable.
1010 // Also, if this block has no predecessors, the value isn't live-in here.
1011 if (OutOfBudget || pred_empty(BB: CurrBB)) {
1012 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1013 State = AvailabilityState::Unavailable;
1014 UnavailableBB = CurrBB;
1015 break; // Backpropagate unavailability info.
1016 }
1017
1018 // Tentatively consider this block as speculatively available.
1019#ifndef NDEBUG
1020 NewSpeculativelyAvailableBBs.insert(CurrBB);
1021#endif
1022 // And further recurse into block's predecessors, in depth-first order!
1023 Worklist.append(in_start: pred_begin(BB: CurrBB), in_end: pred_end(BB: CurrBB));
1024 }
1025
1026#if LLVM_ENABLE_STATS
1027 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1028 NumNewNewSpeculativelyAvailableBBs);
1029#endif
1030
1031 // If the block isn't marked as fixpoint yet
1032 // (the Unavailable and Available states are fixpoints).
1033 auto MarkAsFixpointAndEnqueueSuccessors =
1034 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1035 auto It = FullyAvailableBlocks.find(Val: BB);
1036 if (It == FullyAvailableBlocks.end())
1037 return; // Never queried this block, leave as-is.
1038 switch (AvailabilityState &State = It->second) {
1039 case AvailabilityState::Unavailable:
1040 case AvailabilityState::Available:
1041 return; // Don't backpropagate further, continue processing worklist.
1042 case AvailabilityState::SpeculativelyAvailable: // Fix it!
1043 State = FixpointState;
1044#ifndef NDEBUG
1045 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1046 "Found a speculatively available successor leftover?");
1047#endif
1048 // Queue successors for further processing.
1049 Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
1050 return;
1051 }
1052 };
1053
1054 if (UnavailableBB) {
1055 // Okay, we have encountered an unavailable block.
1056 // Mark speculatively available blocks reachable from UnavailableBB as
1057 // unavailable as well. Paths are terminated when they reach blocks not in
1058 // FullyAvailableBlocks or they are not marked as speculatively available.
1059 Worklist.clear();
1060 Worklist.append(in_start: succ_begin(BB: *UnavailableBB), in_end: succ_end(BB: *UnavailableBB));
1061 while (!Worklist.empty())
1062 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1063 AvailabilityState::Unavailable);
1064 }
1065
1066#ifndef NDEBUG
1067 Worklist.clear();
1068 for (BasicBlock *AvailableBB : AvailableBBs)
1069 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1070 while (!Worklist.empty())
1071 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1072 AvailabilityState::Available);
1073
1074 assert(NewSpeculativelyAvailableBBs.empty() &&
1075 "Must have fixed all the new speculatively available blocks.");
1076#endif
1077
1078 return !UnavailableBB;
1079}
1080
1081/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1082/// NewValue.
1083static void replaceValuesPerBlockEntry(
1084 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1085 Value *NewValue) {
1086 for (AvailableValueInBlock &V : ValuesPerBlock) {
1087 if (V.AV.Val == OldValue)
1088 V.AV.Val = NewValue;
1089 if (V.AV.isSelectValue()) {
1090 if (V.AV.V1 == OldValue)
1091 V.AV.V1 = NewValue;
1092 if (V.AV.V2 == OldValue)
1093 V.AV.V2 = NewValue;
1094 }
1095 }
1096}
1097
1098/// Given a set of loads specified by ValuesPerBlock,
1099/// construct SSA form, allowing us to eliminate Load. This returns the value
1100/// that should be used at Load's definition site.
1101static Value *
1102ConstructSSAForLoadSet(LoadInst *Load,
1103 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1104 GVNPass &GVN) {
1105 // Check for the fully redundant, dominating load case. In this case, we can
1106 // just use the dominating value directly.
1107 if (ValuesPerBlock.size() == 1 &&
1108 GVN.getDominatorTree().properlyDominates(A: ValuesPerBlock[0].BB,
1109 B: Load->getParent())) {
1110 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1111 "Dead BB dominate this block");
1112 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1113 }
1114
1115 // Otherwise, we have to construct SSA form.
1116 SmallVector<PHINode*, 8> NewPHIs;
1117 SSAUpdater SSAUpdate(&NewPHIs);
1118 SSAUpdate.Initialize(Ty: Load->getType(), Name: Load->getName());
1119
1120 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1121 BasicBlock *BB = AV.BB;
1122
1123 if (AV.AV.isUndefValue())
1124 continue;
1125
1126 if (SSAUpdate.HasValueForBlock(BB))
1127 continue;
1128
1129 // If the value is the load that we will be eliminating, and the block it's
1130 // available in is the block that the load is in, then don't add it as
1131 // SSAUpdater will resolve the value to the relevant phi which may let it
1132 // avoid phi construction entirely if there's actually only one value.
1133 if (BB == Load->getParent() &&
1134 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1135 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1136 continue;
1137
1138 SSAUpdate.AddAvailableValue(BB, V: AV.MaterializeAdjustedValue(Load));
1139 }
1140
1141 // Perform PHI construction.
1142 return SSAUpdate.GetValueInMiddleOfBlock(BB: Load->getParent());
1143}
1144
1145Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1146 Instruction *InsertPt) const {
1147 Value *Res;
1148 Type *LoadTy = Load->getType();
1149 const DataLayout &DL = Load->getDataLayout();
1150 if (isSimpleValue()) {
1151 Res = getSimpleValue();
1152 if (Res->getType() != LoadTy) {
1153 Res = getValueForLoad(SrcVal: Res, Offset, LoadTy, InsertPt, F: Load->getFunction());
1154
1155 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1156 << " " << *getSimpleValue() << '\n'
1157 << *Res << '\n'
1158 << "\n\n\n");
1159 }
1160 } else if (isCoercedLoadValue()) {
1161 LoadInst *CoercedLoad = getCoercedLoadValue();
1162 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1163 Res = CoercedLoad;
1164 combineMetadataForCSE(K: CoercedLoad, J: Load, DoesKMove: false);
1165 } else {
1166 Res = getValueForLoad(SrcVal: CoercedLoad, Offset, LoadTy, InsertPt,
1167 F: Load->getFunction());
1168 // We are adding a new user for this load, for which the original
1169 // metadata may not hold. Additionally, the new load may have a different
1170 // size and type, so their metadata cannot be combined in any
1171 // straightforward way.
1172 // Drop all metadata that is not known to cause immediate UB on violation,
1173 // unless the load has !noundef, in which case all metadata violations
1174 // will be promoted to UB.
1175 // TODO: We can combine noalias/alias.scope metadata here, because it is
1176 // independent of the load type.
1177 if (!CoercedLoad->hasMetadata(KindID: LLVMContext::MD_noundef))
1178 CoercedLoad->dropUnknownNonDebugMetadata(
1179 KnownIDs: {LLVMContext::MD_dereferenceable,
1180 LLVMContext::MD_dereferenceable_or_null,
1181 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1182 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1183 << " " << *getCoercedLoadValue() << '\n'
1184 << *Res << '\n'
1185 << "\n\n\n");
1186 }
1187 } else if (isMemIntrinValue()) {
1188 Res = getMemInstValueForLoad(SrcInst: getMemIntrinValue(), Offset, LoadTy,
1189 InsertPt, DL);
1190 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1191 << " " << *getMemIntrinValue() << '\n'
1192 << *Res << '\n'
1193 << "\n\n\n");
1194 } else if (isSelectValue()) {
1195 // Introduce a new value select for a load from an eligible pointer select.
1196 SelectInst *Sel = getSelectValue();
1197 assert(V1 && V2 && "both value operands of the select must be present");
1198 Res =
1199 SelectInst::Create(C: Sel->getCondition(), S1: V1, S2: V2, NameStr: "", InsertBefore: Sel->getIterator());
1200 // We use the DebugLoc from the original load here, as this instruction
1201 // materializes the value that would previously have been loaded.
1202 cast<SelectInst>(Val: Res)->setDebugLoc(Load->getDebugLoc());
1203 } else {
1204 llvm_unreachable("Should not materialize value from dead block");
1205 }
1206 assert(Res && "failed to materialize?");
1207 return Res;
1208}
1209
1210static bool isLifetimeStart(const Instruction *Inst) {
1211 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Val: Inst))
1212 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1213 return false;
1214}
1215
1216/// Assuming To can be reached from both From and Between, does Between lie on
1217/// every path from From to To?
1218static bool liesBetween(const Instruction *From, Instruction *Between,
1219 const Instruction *To, const DominatorTree *DT) {
1220 if (From->getParent() == Between->getParent())
1221 return DT->dominates(Def: From, User: Between);
1222 SmallPtrSet<BasicBlock *, 1> Exclusion;
1223 Exclusion.insert(Ptr: Between->getParent());
1224 return !isPotentiallyReachable(From, To, ExclusionSet: &Exclusion, DT);
1225}
1226
1227static const Instruction *findMayClobberedPtrAccess(LoadInst *Load,
1228 const DominatorTree *DT) {
1229 Value *PtrOp = Load->getPointerOperand();
1230 if (!PtrOp->hasUseList())
1231 return nullptr;
1232
1233 Instruction *OtherAccess = nullptr;
1234
1235 for (auto *U : PtrOp->users()) {
1236 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1237 auto *I = cast<Instruction>(Val: U);
1238 if (I->getFunction() == Load->getFunction() && DT->dominates(Def: I, User: Load)) {
1239 // Use the most immediately dominating value.
1240 if (OtherAccess) {
1241 if (DT->dominates(Def: OtherAccess, User: I))
1242 OtherAccess = I;
1243 else
1244 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1245 } else
1246 OtherAccess = I;
1247 }
1248 }
1249 }
1250
1251 if (OtherAccess)
1252 return OtherAccess;
1253
1254 // There is no dominating use, check if we can find a closest non-dominating
1255 // use that lies between any other potentially available use and Load.
1256 for (auto *U : PtrOp->users()) {
1257 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1258 auto *I = cast<Instruction>(Val: U);
1259 if (I->getFunction() == Load->getFunction() &&
1260 isPotentiallyReachable(From: I, To: Load, ExclusionSet: nullptr, DT)) {
1261 if (OtherAccess) {
1262 if (liesBetween(From: OtherAccess, Between: I, To: Load, DT)) {
1263 OtherAccess = I;
1264 } else if (!liesBetween(From: I, Between: OtherAccess, To: Load, DT)) {
1265 // These uses are both partially available at Load were it not for
1266 // the clobber, but neither lies strictly after the other.
1267 OtherAccess = nullptr;
1268 break;
1269 } // else: keep current OtherAccess since it lies between U and
1270 // Load.
1271 } else {
1272 OtherAccess = I;
1273 }
1274 }
1275 }
1276 }
1277
1278 return OtherAccess;
1279}
1280
1281/// Try to locate the three instruction involved in a missed
1282/// load-elimination case that is due to an intervening store.
1283static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1284 const DominatorTree *DT,
1285 OptimizationRemarkEmitter *ORE) {
1286 using namespace ore;
1287
1288 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1289 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1290 << setExtraArgs();
1291
1292 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1293 if (OtherAccess)
1294 R << " in favor of " << NV("OtherAccess", OtherAccess);
1295
1296 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1297
1298 ORE->emit(OptDiag&: R);
1299}
1300
1301// Find a dominating value for Loc memory location in the extended basic block
1302// (chain of basic blocks with single predecessors) starting From instruction.
1303// Returns the value from a matching load or a simple store to the same pointer.
1304static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1305 Instruction *From, AAResults *AA) {
1306 uint32_t NumVisitedInsts = 0;
1307 BasicBlock *FromBB = From->getParent();
1308 BatchAAResults BatchAA(*AA);
1309 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1310 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1311 Inst != nullptr; Inst = Inst->getPrevNode()) {
1312 // Stop the search if limit is reached.
1313 if (++NumVisitedInsts > MaxNumVisitedInsts)
1314 return nullptr;
1315 if (isModSet(MRI: BatchAA.getModRefInfo(I: Inst, OptLoc: Loc))) {
1316 // A simple store to the exact location can forward its value.
1317 if (auto *SI = dyn_cast<StoreInst>(Val: Inst))
1318 if (SI->isSimple() && SI->getPointerOperand() == Loc.Ptr &&
1319 SI->getValueOperand()->getType() == LoadTy)
1320 return SI->getValueOperand();
1321 return nullptr;
1322 }
1323 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1324 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1325 return LI;
1326 }
1327 return nullptr;
1328}
1329
1330std::optional<AvailableValue>
1331GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1332 Value *Address) {
1333 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1334 assert(DepInfo.isLocal() && "expected a local dependence");
1335
1336 Instruction *DepInst = DepInfo.getInst();
1337
1338 const DataLayout &DL = Load->getDataLayout();
1339 if (DepInfo.isClobber()) {
1340 // If the dependence is to a store that writes to a superset of the bits
1341 // read by the load, we can extract the bits we need for the load from the
1342 // stored value.
1343 if (StoreInst *DepSI = dyn_cast<StoreInst>(Val: DepInst)) {
1344 // Can't forward from non-atomic to atomic without violating memory model.
1345 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1346 int Offset =
1347 analyzeLoadFromClobberingStore(LoadTy: Load->getType(), LoadPtr: Address, DepSI, DL);
1348 if (Offset != -1)
1349 return AvailableValue::get(V: DepSI->getValueOperand(), Offset);
1350 }
1351 }
1352
1353 // Check to see if we have something like this:
1354 // load i32* P
1355 // load i8* (P+1)
1356 // if we have this, replace the later with an extraction from the former.
1357 if (LoadInst *DepLoad = dyn_cast<LoadInst>(Val: DepInst)) {
1358 // If this is a clobber and L is the first instruction in its block, then
1359 // we have the first instruction in the entry block.
1360 // Can't forward from non-atomic to atomic without violating memory model.
1361 if (DepLoad != Load && Address &&
1362 Load->isAtomic() <= DepLoad->isAtomic()) {
1363 Type *LoadType = Load->getType();
1364 int Offset = -1;
1365
1366 // If MD reported clobber, check it was nested.
1367 if (DepInfo.isClobber() &&
1368 canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType,
1369 F: DepLoad->getFunction())) {
1370 const auto ClobberOff = MD->getClobberOffset(DepInst: DepLoad);
1371 // GVN has no deal with a negative offset.
1372 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1373 ? -1
1374 : *ClobberOff;
1375 }
1376 if (Offset == -1)
1377 Offset =
1378 analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr: Address, DepLI: DepLoad, DL);
1379 if (Offset != -1)
1380 return AvailableValue::getLoad(Load: DepLoad, Offset);
1381 }
1382 }
1383
1384 // If the clobbering value is a memset/memcpy/memmove, see if we can
1385 // forward a value on from it.
1386 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) {
1387 if (Address && !Load->isAtomic()) {
1388 int Offset = analyzeLoadFromClobberingMemInst(LoadTy: Load->getType(), LoadPtr: Address,
1389 DepMI, DL);
1390 if (Offset != -1)
1391 return AvailableValue::getMI(MI: DepMI, Offset);
1392 }
1393 }
1394
1395 // Nothing known about this clobber, have to be conservative.
1396 LLVM_DEBUG(
1397 // fast print dep, using operator<< on instruction is too slow.
1398 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1399 dbgs() << " is clobbered by " << *DepInst << '\n';);
1400 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1401 reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1402
1403 return std::nullopt;
1404 }
1405 assert(DepInfo.isDef() && "follows from above");
1406
1407 // Loading the alloca -> undef.
1408 // Loading immediately after lifetime begin -> undef.
1409 if (isa<AllocaInst>(Val: DepInst) || isLifetimeStart(Inst: DepInst))
1410 return AvailableValue::get(V: UndefValue::get(T: Load->getType()));
1411
1412 if (Constant *InitVal =
1413 getInitialValueOfAllocation(V: DepInst, TLI, Ty: Load->getType()))
1414 return AvailableValue::get(V: InitVal);
1415
1416 if (StoreInst *S = dyn_cast<StoreInst>(Val: DepInst)) {
1417 // Reject loads and stores that are to the same address but are of
1418 // different types if we have to. If the stored value is convertable to
1419 // the loaded value, we can reuse it.
1420 if (!canCoerceMustAliasedValueToLoad(StoredVal: S->getValueOperand(), LoadTy: Load->getType(),
1421 F: S->getFunction()))
1422 return std::nullopt;
1423
1424 // Can't forward from non-atomic to atomic without violating memory model.
1425 if (S->isAtomic() < Load->isAtomic())
1426 return std::nullopt;
1427
1428 return AvailableValue::get(V: S->getValueOperand());
1429 }
1430
1431 if (LoadInst *LD = dyn_cast<LoadInst>(Val: DepInst)) {
1432 // If the types mismatch and we can't handle it, reject reuse of the load.
1433 // If the stored value is larger or equal to the loaded value, we can reuse
1434 // it.
1435 if (!canCoerceMustAliasedValueToLoad(StoredVal: LD, LoadTy: Load->getType(),
1436 F: LD->getFunction()))
1437 return std::nullopt;
1438
1439 // Can't forward from non-atomic to atomic without violating memory model.
1440 if (LD->isAtomic() < Load->isAtomic())
1441 return std::nullopt;
1442
1443 return AvailableValue::getLoad(Load: LD);
1444 }
1445
1446 // Check if load with Addr dependent from select can be converted to select
1447 // between load values. There must be no instructions between the found
1448 // loads and DepInst that may clobber the loads.
1449 if (auto *Sel = dyn_cast<SelectInst>(Val: DepInst)) {
1450 assert(Sel->getType() == Load->getPointerOperandType());
1451 auto Loc = MemoryLocation::get(LI: Load);
1452 Value *V1 =
1453 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getTrueValue()),
1454 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1455 if (!V1)
1456 return std::nullopt;
1457 Value *V2 =
1458 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getFalseValue()),
1459 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1460 if (!V2)
1461 return std::nullopt;
1462 return AvailableValue::getSelect(Sel, V1, V2);
1463 }
1464
1465 // Unknown def - must be conservative.
1466 LLVM_DEBUG(
1467 // fast print dep, using operator<< on instruction is too slow.
1468 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1469 dbgs() << " has unknown def " << *DepInst << '\n';);
1470 return std::nullopt;
1471}
1472
1473void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1474 AvailValInBlkVect &ValuesPerBlock,
1475 UnavailBlkVect &UnavailableBlocks) {
1476 // Filter out useless results (non-locals, etc). Keep track of the blocks
1477 // where we have a value available in repl, also keep track of whether we see
1478 // dependencies that produce an unknown value for the load (such as a call
1479 // that could potentially clobber the load).
1480 for (const auto &Dep : Deps) {
1481 BasicBlock *DepBB = Dep.getBB();
1482 MemDepResult DepInfo = Dep.getResult();
1483
1484 if (DeadBlocks.count(key: DepBB)) {
1485 // Dead dependent mem-op disguise as a load evaluating the same value
1486 // as the load in question.
1487 ValuesPerBlock.push_back(Elt: AvailableValueInBlock::getUndef(BB: DepBB));
1488 continue;
1489 }
1490
1491 if (!DepInfo.isLocal()) {
1492 UnavailableBlocks.push_back(Elt: DepBB);
1493 continue;
1494 }
1495
1496 // The address being loaded in this non-local block may not be the same as
1497 // the pointer operand of the load if PHI translation occurs. Make sure
1498 // to consider the right address.
1499 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address: Dep.getAddress())) {
1500 // subtlety: because we know this was a non-local dependency, we know
1501 // it's safe to materialize anywhere between the instruction within
1502 // DepInfo and the end of it's block.
1503 ValuesPerBlock.push_back(
1504 Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV)));
1505 } else {
1506 UnavailableBlocks.push_back(Elt: DepBB);
1507 }
1508 }
1509
1510 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1511 "post condition violation");
1512}
1513
1514/// Given the following code, v1 is partially available on some edges, but not
1515/// available on the edge from PredBB. This function tries to find if there is
1516/// another identical load in the other successor of PredBB.
1517///
1518/// v0 = load %addr
1519/// br %LoadBB
1520///
1521/// LoadBB:
1522/// v1 = load %addr
1523/// ...
1524///
1525/// PredBB:
1526/// ...
1527/// br %cond, label %LoadBB, label %SuccBB
1528///
1529/// SuccBB:
1530/// v2 = load %addr
1531/// ...
1532///
1533LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1534 LoadInst *Load) {
1535 // For simplicity we handle a Pred has 2 successors only.
1536 auto *Term = Pred->getTerminator();
1537 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1538 return nullptr;
1539 auto *SuccBB = Term->getSuccessor(Idx: 0);
1540 if (SuccBB == LoadBB)
1541 SuccBB = Term->getSuccessor(Idx: 1);
1542 if (!SuccBB->getSinglePredecessor())
1543 return nullptr;
1544
1545 unsigned int NumInsts = MaxNumInsnsPerBlock;
1546 for (Instruction &Inst : *SuccBB) {
1547 if (Inst.isDebugOrPseudoInst())
1548 continue;
1549 if (--NumInsts == 0)
1550 return nullptr;
1551
1552 if (!Inst.isIdenticalTo(I: Load))
1553 continue;
1554
1555 MemDepResult Dep = MD->getDependency(QueryInst: &Inst);
1556 // If an identical load doesn't depends on any local instructions, it can
1557 // be safely moved to PredBB.
1558 // Also check for the implicit control flow instructions. See the comments
1559 // in PerformLoadPRE for details.
1560 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(Insn: &Inst))
1561 return cast<LoadInst>(Val: &Inst);
1562
1563 // Otherwise there is something in the same BB clobbers the memory, we can't
1564 // move this and later load to PredBB.
1565 return nullptr;
1566 }
1567
1568 return nullptr;
1569}
1570
1571void GVNPass::eliminatePartiallyRedundantLoad(
1572 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1573 MapVector<BasicBlock *, Value *> &AvailableLoads,
1574 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1575 for (const auto &AvailableLoad : AvailableLoads) {
1576 BasicBlock *UnavailableBlock = AvailableLoad.first;
1577 Value *LoadPtr = AvailableLoad.second;
1578
1579 auto *NewLoad = new LoadInst(
1580 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1581 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1582 UnavailableBlock->getTerminator()->getIterator());
1583 NewLoad->setDebugLoc(Load->getDebugLoc());
1584 if (MSSAU) {
1585 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1586 I: NewLoad, Definition: nullptr, BB: NewLoad->getParent(), Point: MemorySSA::BeforeTerminator);
1587 if (auto *NewDef = dyn_cast<MemoryDef>(Val: NewAccess))
1588 MSSAU->insertDef(Def: NewDef, /*RenameUses=*/true);
1589 else
1590 MSSAU->insertUse(Use: cast<MemoryUse>(Val: NewAccess), /*RenameUses=*/true);
1591 }
1592
1593 // Transfer the old load's AA tags to the new load.
1594 AAMDNodes Tags = Load->getAAMetadata();
1595 if (Tags)
1596 NewLoad->setAAMetadata(Tags);
1597
1598 if (auto *MD = Load->getMetadata(KindID: LLVMContext::MD_invariant_load))
1599 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
1600 if (auto *InvGroupMD = Load->getMetadata(KindID: LLVMContext::MD_invariant_group))
1601 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: InvGroupMD);
1602 if (auto *RangeMD = Load->getMetadata(KindID: LLVMContext::MD_range))
1603 NewLoad->setMetadata(KindID: LLVMContext::MD_range, Node: RangeMD);
1604 if (auto *NoFPClassMD = Load->getMetadata(KindID: LLVMContext::MD_nofpclass))
1605 NewLoad->setMetadata(KindID: LLVMContext::MD_nofpclass, Node: NoFPClassMD);
1606
1607 if (auto *AccessMD = Load->getMetadata(KindID: LLVMContext::MD_access_group))
1608 if (LI->getLoopFor(BB: Load->getParent()) == LI->getLoopFor(BB: UnavailableBlock))
1609 NewLoad->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessMD);
1610
1611 // We do not propagate the old load's debug location, because the new
1612 // load now lives in a different BB, and we want to avoid a jumpy line
1613 // table.
1614 // FIXME: How do we retain source locations without causing poor debugging
1615 // behavior?
1616
1617 // Add the newly created load.
1618 ValuesPerBlock.push_back(
1619 Elt: AvailableValueInBlock::get(BB: UnavailableBlock, V: NewLoad));
1620 MD->invalidateCachedPointerInfo(Ptr: LoadPtr);
1621 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1622
1623 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1624 // load instruction with the new created load instruction.
1625 if (CriticalEdgePredAndLoad) {
1626 auto It = CriticalEdgePredAndLoad->find(Key: UnavailableBlock);
1627 if (It != CriticalEdgePredAndLoad->end()) {
1628 ++NumPRELoadMoved2CEPred;
1629 ICF->insertInstructionTo(Inst: NewLoad, BB: UnavailableBlock);
1630 LoadInst *OldLoad = It->second;
1631 combineMetadataForCSE(K: NewLoad, J: OldLoad, DoesKMove: false);
1632 OldLoad->replaceAllUsesWith(V: NewLoad);
1633 replaceValuesPerBlockEntry(ValuesPerBlock, OldValue: OldLoad, NewValue: NewLoad);
1634 if (uint32_t ValNo = VN.lookup(V: OldLoad, Verify: false))
1635 LeaderTable.erase(N: ValNo, I: OldLoad, BB: OldLoad->getParent());
1636 removeInstruction(I: OldLoad);
1637 }
1638 }
1639 }
1640
1641 // Perform PHI construction.
1642 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
1643 // ConstructSSAForLoadSet is responsible for combining metadata.
1644 ICF->removeUsersOf(Inst: Load);
1645 Load->replaceAllUsesWith(V);
1646 if (isa<PHINode>(Val: V))
1647 V->takeName(V: Load);
1648 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1649 I->setDebugLoc(Load->getDebugLoc());
1650 if (V->getType()->isPtrOrPtrVectorTy())
1651 MD->invalidateCachedPointerInfo(Ptr: V);
1652 ORE->emit(RemarkBuilder: [&]() {
1653 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1654 << "load eliminated by PRE";
1655 });
1656 salvageAndRemoveInstruction(I: Load);
1657}
1658
1659bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1660 UnavailBlkVect &UnavailableBlocks) {
1661 // Okay, we have *some* definitions of the value. This means that the value
1662 // is available in some of our (transitive) predecessors. Lets think about
1663 // doing PRE of this load. This will involve inserting a new load into the
1664 // predecessor when it's not available. We could do this in general, but
1665 // prefer to not increase code size. As such, we only do this when we know
1666 // that we only have to insert *one* load (which means we're basically moving
1667 // the load, not inserting a new one).
1668
1669 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1670
1671 // Let's find the first basic block with more than one predecessor. Walk
1672 // backwards through predecessors if needed.
1673 BasicBlock *LoadBB = Load->getParent();
1674 BasicBlock *TmpBB = LoadBB;
1675
1676 // Check that there is no implicit control flow instructions above our load in
1677 // its block. If there is an instruction that doesn't always pass the
1678 // execution to the following instruction, then moving through it may become
1679 // invalid. For example:
1680 //
1681 // int arr[LEN];
1682 // int index = ???;
1683 // ...
1684 // guard(0 <= index && index < LEN);
1685 // use(arr[index]);
1686 //
1687 // It is illegal to move the array access to any point above the guard,
1688 // because if the index is out of bounds we should deoptimize rather than
1689 // access the array.
1690 // Check that there is no guard in this block above our instruction.
1691 bool MustEnsureSafetyOfSpeculativeExecution =
1692 ICF->isDominatedByICFIFromSameBlock(Insn: Load);
1693
1694 while (TmpBB->getSinglePredecessor()) {
1695 TmpBB = TmpBB->getSinglePredecessor();
1696 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1697 return false;
1698 if (Blockers.count(Ptr: TmpBB))
1699 return false;
1700
1701 // If any of these blocks has more than one successor (i.e. if the edge we
1702 // just traversed was critical), then there are other paths through this
1703 // block along which the load may not be anticipated. Hoisting the load
1704 // above this block would be adding the load to execution paths along
1705 // which it was not previously executed.
1706 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1707 return false;
1708
1709 // Check that there is no implicit control flow in a block above.
1710 MustEnsureSafetyOfSpeculativeExecution =
1711 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(BB: TmpBB);
1712 }
1713
1714 assert(TmpBB);
1715 LoadBB = TmpBB;
1716
1717 // Check to see how many predecessors have the loaded value fully
1718 // available.
1719 MapVector<BasicBlock *, Value *> PredLoads;
1720 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1721 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1722 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1723 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1724 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1725
1726 // The edge from Pred to LoadBB is a critical edge will be splitted.
1727 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1728 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1729 // contains a load can be moved to Pred. This data structure maps the Pred to
1730 // the movable load.
1731 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1732 for (BasicBlock *Pred : predecessors(BB: LoadBB)) {
1733 // If any predecessor block is an EH pad that does not allow non-PHI
1734 // instructions before the terminator, we can't PRE the load.
1735 if (Pred->getTerminator()->isEHPad()) {
1736 LLVM_DEBUG(
1737 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1738 << Pred->getName() << "': " << *Load << '\n');
1739 return false;
1740 }
1741
1742 if (IsValueFullyAvailableInBlock(BB: Pred, FullyAvailableBlocks)) {
1743 continue;
1744 }
1745
1746 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1747 if (isa<IndirectBrInst>(Val: Pred->getTerminator())) {
1748 LLVM_DEBUG(
1749 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1750 << Pred->getName() << "': " << *Load << '\n');
1751 return false;
1752 }
1753
1754 if (LoadBB->isEHPad()) {
1755 LLVM_DEBUG(
1756 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1757 << Pred->getName() << "': " << *Load << '\n');
1758 return false;
1759 }
1760
1761 // Do not split backedge as it will break the canonical loop form.
1762 if (!isLoadPRESplitBackedgeEnabled())
1763 if (DT->dominates(A: LoadBB, B: Pred)) {
1764 LLVM_DEBUG(
1765 dbgs()
1766 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1767 << Pred->getName() << "': " << *Load << '\n');
1768 return false;
1769 }
1770
1771 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1772 CriticalEdgePredAndLoad[Pred] = LI;
1773 else
1774 CriticalEdgePredSplit.push_back(Elt: Pred);
1775 } else {
1776 // Only add the predecessors that will not be split for now.
1777 PredLoads[Pred] = nullptr;
1778 }
1779 }
1780
1781 // Decide whether PRE is profitable for this load.
1782 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1783 unsigned NumUnavailablePreds = NumInsertPreds +
1784 CriticalEdgePredAndLoad.size();
1785 assert(NumUnavailablePreds != 0 &&
1786 "Fully available value should already be eliminated!");
1787 (void)NumUnavailablePreds;
1788
1789 // If we need to insert new load in multiple predecessors, reject it.
1790 // FIXME: If we could restructure the CFG, we could make a common pred with
1791 // all the preds that don't have an available Load and insert a new load into
1792 // that one block.
1793 if (NumInsertPreds > 1)
1794 return false;
1795
1796 // Now we know where we will insert load. We must ensure that it is safe
1797 // to speculatively execute the load at that points.
1798 if (MustEnsureSafetyOfSpeculativeExecution) {
1799 if (CriticalEdgePredSplit.size())
1800 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: &*LoadBB->getFirstNonPHIIt(), AC,
1801 DT))
1802 return false;
1803 for (auto &PL : PredLoads)
1804 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: PL.first->getTerminator(), AC,
1805 DT))
1806 return false;
1807 for (auto &CEP : CriticalEdgePredAndLoad)
1808 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: CEP.first->getTerminator(), AC,
1809 DT))
1810 return false;
1811 }
1812
1813 // Split critical edges, and update the unavailable predecessors accordingly.
1814 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1815 BasicBlock *NewPred = splitCriticalEdges(Pred: OrigPred, Succ: LoadBB);
1816 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1817 PredLoads[NewPred] = nullptr;
1818 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1819 << LoadBB->getName() << '\n');
1820 }
1821
1822 for (auto &CEP : CriticalEdgePredAndLoad)
1823 PredLoads[CEP.first] = nullptr;
1824
1825 // Check if the load can safely be moved to all the unavailable predecessors.
1826 bool CanDoPRE = true;
1827 const DataLayout &DL = Load->getDataLayout();
1828 SmallVector<Instruction*, 8> NewInsts;
1829 for (auto &PredLoad : PredLoads) {
1830 BasicBlock *UnavailablePred = PredLoad.first;
1831
1832 // Do PHI translation to get its value in the predecessor if necessary. The
1833 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1834 // We do the translation for each edge we skipped by going from Load's block
1835 // to LoadBB, otherwise we might miss pieces needing translation.
1836
1837 // If all preds have a single successor, then we know it is safe to insert
1838 // the load on the pred (?!?), so we can insert code to materialize the
1839 // pointer if it is not available.
1840 Value *LoadPtr = Load->getPointerOperand();
1841 BasicBlock *Cur = Load->getParent();
1842 while (Cur != LoadBB) {
1843 PHITransAddr Address(LoadPtr, DL, AC);
1844 LoadPtr = Address.translateWithInsertion(CurBB: Cur, PredBB: Cur->getSinglePredecessor(),
1845 DT: *DT, NewInsts);
1846 if (!LoadPtr) {
1847 CanDoPRE = false;
1848 break;
1849 }
1850 Cur = Cur->getSinglePredecessor();
1851 }
1852
1853 if (LoadPtr) {
1854 PHITransAddr Address(LoadPtr, DL, AC);
1855 LoadPtr = Address.translateWithInsertion(CurBB: LoadBB, PredBB: UnavailablePred, DT: *DT,
1856 NewInsts);
1857 }
1858 // If we couldn't find or insert a computation of this phi translated value,
1859 // we fail PRE.
1860 if (!LoadPtr) {
1861 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1862 << *Load->getPointerOperand() << "\n");
1863 CanDoPRE = false;
1864 break;
1865 }
1866
1867 PredLoad.second = LoadPtr;
1868 }
1869
1870 if (!CanDoPRE) {
1871 while (!NewInsts.empty()) {
1872 // Erase instructions generated by the failed PHI translation before
1873 // trying to number them. PHI translation might insert instructions
1874 // in basic blocks other than the current one, and we delete them
1875 // directly, as salvageAndRemoveInstruction only allows removing from the
1876 // current basic block.
1877 NewInsts.pop_back_val()->eraseFromParent();
1878 }
1879 // HINT: Don't revert the edge-splitting as following transformation may
1880 // also need to split these critical edges.
1881 return !CriticalEdgePredSplit.empty();
1882 }
1883
1884 // Okay, we can eliminate this load by inserting a reload in the predecessor
1885 // and using PHI construction to get the value in the other predecessors, do
1886 // it.
1887 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1888 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1889 << " INSTS: " << *NewInsts.back()
1890 << '\n');
1891
1892 // Assign value numbers to the new instructions.
1893 for (Instruction *I : NewInsts) {
1894 // Instructions that have been inserted in predecessor(s) to materialize
1895 // the load address do not retain their original debug locations. Doing
1896 // so could lead to confusing (but correct) source attributions.
1897 I->updateLocationAfterHoist();
1898
1899 // FIXME: We really _ought_ to insert these value numbers into their
1900 // parent's availability map. However, in doing so, we risk getting into
1901 // ordering issues. If a block hasn't been processed yet, we would be
1902 // marking a value as AVAIL-IN, which isn't what we intend.
1903 VN.lookupOrAdd(V: I);
1904 }
1905
1906 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads&: PredLoads,
1907 CriticalEdgePredAndLoad: &CriticalEdgePredAndLoad);
1908 ++NumPRELoad;
1909 return true;
1910}
1911
1912bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1913 AvailValInBlkVect &ValuesPerBlock,
1914 UnavailBlkVect &UnavailableBlocks) {
1915 const Loop *L = LI->getLoopFor(BB: Load->getParent());
1916 // TODO: Generalize to other loop blocks that dominate the latch.
1917 if (!L || L->getHeader() != Load->getParent())
1918 return false;
1919
1920 BasicBlock *Preheader = L->getLoopPreheader();
1921 BasicBlock *Latch = L->getLoopLatch();
1922 if (!Preheader || !Latch)
1923 return false;
1924
1925 Value *LoadPtr = Load->getPointerOperand();
1926 // Must be available in preheader.
1927 if (!L->isLoopInvariant(V: LoadPtr))
1928 return false;
1929
1930 // We plan to hoist the load to preheader without introducing a new fault.
1931 // In order to do it, we need to prove that we cannot side-exit the loop
1932 // once loop header is first entered before execution of the load.
1933 if (ICF->isDominatedByICFIFromSameBlock(Insn: Load))
1934 return false;
1935
1936 BasicBlock *LoopBlock = nullptr;
1937 for (auto *Blocker : UnavailableBlocks) {
1938 // Blockers from outside the loop are handled in preheader.
1939 if (!L->contains(BB: Blocker))
1940 continue;
1941
1942 // Only allow one loop block. Loop header is not less frequently executed
1943 // than each loop block, and likely it is much more frequently executed. But
1944 // in case of multiple loop blocks, we need extra information (such as block
1945 // frequency info) to understand whether it is profitable to PRE into
1946 // multiple loop blocks.
1947 if (LoopBlock)
1948 return false;
1949
1950 // Do not sink into inner loops. This may be non-profitable.
1951 if (L != LI->getLoopFor(BB: Blocker))
1952 return false;
1953
1954 // Blocks that dominate the latch execute on every single iteration, maybe
1955 // except the last one. So PREing into these blocks doesn't make much sense
1956 // in most cases. But the blocks that do not necessarily execute on each
1957 // iteration are sometimes much colder than the header, and this is when
1958 // PRE is potentially profitable.
1959 if (DT->dominates(A: Blocker, B: Latch))
1960 return false;
1961
1962 // Make sure that the terminator itself doesn't clobber.
1963 if (Blocker->getTerminator()->mayWriteToMemory())
1964 return false;
1965
1966 LoopBlock = Blocker;
1967 }
1968
1969 if (!LoopBlock)
1970 return false;
1971
1972 // Make sure the memory at this pointer cannot be freed, therefore we can
1973 // safely reload from it after clobber.
1974 if (LoadPtr->canBeFreed())
1975 return false;
1976
1977 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1978 MapVector<BasicBlock *, Value *> AvailableLoads;
1979 AvailableLoads[LoopBlock] = LoadPtr;
1980 AvailableLoads[Preheader] = LoadPtr;
1981
1982 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1983 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1984 /*CriticalEdgePredAndLoad*/ nullptr);
1985 ++NumPRELoopLoad;
1986 return true;
1987}
1988
1989static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1990 OptimizationRemarkEmitter *ORE) {
1991 using namespace ore;
1992
1993 ORE->emit(RemarkBuilder: [&]() {
1994 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1995 << "load of type " << NV("Type", Load->getType()) << " eliminated"
1996 << setExtraArgs() << " in favor of "
1997 << NV("InfavorOfValue", AvailableValue);
1998 });
1999}
2000
2001/// Attempt to eliminate a load whose dependencies are
2002/// non-local by performing PHI construction.
2003bool GVNPass::processNonLocalLoad(LoadInst *Load) {
2004 // Non-local speculations are not allowed under asan.
2005 if (Load->getParent()->getParent()->hasFnAttribute(
2006 Kind: Attribute::SanitizeAddress) ||
2007 Load->getParent()->getParent()->hasFnAttribute(
2008 Kind: Attribute::SanitizeHWAddress))
2009 return false;
2010
2011 // Step 1: Find the non-local dependencies of the load.
2012 LoadDepVect Deps;
2013 MD->getNonLocalPointerDependency(QueryInst: Load, Result&: Deps);
2014
2015 // If we had to process more than one hundred blocks to find the
2016 // dependencies, this load isn't worth worrying about. Optimizing
2017 // it will be too expensive.
2018 unsigned NumDeps = Deps.size();
2019 if (NumDeps > MaxNumDeps)
2020 return false;
2021
2022 // If we had a phi translation failure, we'll have a single entry which is a
2023 // clobber in the current block. Reject this early.
2024 if (NumDeps == 1 &&
2025 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
2026 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2027 dbgs() << " has unknown dependencies\n";);
2028 return false;
2029 }
2030
2031 bool Changed = false;
2032 // If this load follows a GEP, see if we can PRE the indices before analyzing.
2033 if (GetElementPtrInst *GEP =
2034 dyn_cast<GetElementPtrInst>(Val: Load->getOperand(i_nocapture: 0))) {
2035 for (Use &U : GEP->indices())
2036 if (Instruction *I = dyn_cast<Instruction>(Val: U.get()))
2037 Changed |= performScalarPRE(I);
2038 }
2039
2040 // Step 2: Analyze the availability of the load.
2041 AvailValInBlkVect ValuesPerBlock;
2042 UnavailBlkVect UnavailableBlocks;
2043 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2044
2045 // If we have no predecessors that produce a known value for this load, exit
2046 // early.
2047 if (ValuesPerBlock.empty())
2048 return Changed;
2049
2050 // Step 3: Eliminate fully redundancy.
2051 //
2052 // If all of the instructions we depend on produce a known value for this
2053 // load, then it is fully redundant and we can use PHI insertion to compute
2054 // its value. Insert PHIs and remove the fully redundant value now.
2055 if (UnavailableBlocks.empty()) {
2056 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2057
2058 // Perform PHI construction.
2059 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
2060 // ConstructSSAForLoadSet is responsible for combining metadata.
2061 ICF->removeUsersOf(Inst: Load);
2062 Load->replaceAllUsesWith(V);
2063
2064 if (isa<PHINode>(Val: V))
2065 V->takeName(V: Load);
2066 if (Instruction *I = dyn_cast<Instruction>(Val: V))
2067 // If instruction I has debug info, then we should not update it.
2068 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2069 // to propagate Load's DebugLoc because Load may not post-dominate I.
2070 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2071 I->setDebugLoc(Load->getDebugLoc());
2072 if (V->getType()->isPtrOrPtrVectorTy())
2073 MD->invalidateCachedPointerInfo(Ptr: V);
2074 ++NumGVNLoad;
2075 reportLoadElim(Load, AvailableValue: V, ORE);
2076 salvageAndRemoveInstruction(I: Load);
2077 return true;
2078 }
2079
2080 // Step 4: Eliminate partial redundancy.
2081 if (!isPREEnabled() || !isLoadPREEnabled())
2082 return Changed;
2083 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(BB: Load->getParent()))
2084 return Changed;
2085
2086 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2087 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2088 return true;
2089
2090 return Changed;
2091}
2092
2093bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2094 Value *V = IntrinsicI->getArgOperand(i: 0);
2095
2096 if (ConstantInt *Cond = dyn_cast<ConstantInt>(Val: V)) {
2097 if (Cond->isZero()) {
2098 Type *Int8Ty = Type::getInt8Ty(C&: V->getContext());
2099 Type *PtrTy = PointerType::get(C&: V->getContext(), AddressSpace: 0);
2100 // Insert a new store to null instruction before the load to indicate that
2101 // this code is not reachable. FIXME: We could insert unreachable
2102 // instruction directly because we can modify the CFG.
2103 auto *NewS =
2104 new StoreInst(PoisonValue::get(T: Int8Ty), Constant::getNullValue(Ty: PtrTy),
2105 IntrinsicI->getIterator());
2106 if (MSSAU) {
2107 const MemoryUseOrDef *FirstNonDom = nullptr;
2108 const auto *AL =
2109 MSSAU->getMemorySSA()->getBlockAccesses(BB: IntrinsicI->getParent());
2110
2111 // If there are accesses in the current basic block, find the first one
2112 // that does not come before NewS. The new memory access is inserted
2113 // after the found access or before the terminator if no such access is
2114 // found.
2115 if (AL) {
2116 for (const auto &Acc : *AL) {
2117 if (auto *Current = dyn_cast<MemoryUseOrDef>(Val: &Acc))
2118 if (!Current->getMemoryInst()->comesBefore(Other: NewS)) {
2119 FirstNonDom = Current;
2120 break;
2121 }
2122 }
2123 }
2124
2125 auto *NewDef =
2126 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2127 I: NewS, Definition: nullptr,
2128 InsertPt: const_cast<MemoryUseOrDef *>(FirstNonDom))
2129 : MSSAU->createMemoryAccessInBB(
2130 I: NewS, Definition: nullptr,
2131 BB: NewS->getParent(), Point: MemorySSA::BeforeTerminator);
2132
2133 MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewDef), /*RenameUses=*/false);
2134 }
2135 }
2136 if (isAssumeWithEmptyBundle(Assume: *IntrinsicI)) {
2137 salvageAndRemoveInstruction(I: IntrinsicI);
2138 return true;
2139 }
2140 return false;
2141 }
2142
2143 if (isa<Constant>(Val: V)) {
2144 // If it's not false, and constant, it must evaluate to true. This means our
2145 // assume is assume(true), and thus, pointless, and we don't want to do
2146 // anything more here.
2147 return false;
2148 }
2149
2150 Constant *True = ConstantInt::getTrue(Context&: V->getContext());
2151 return propagateEquality(LHS: V, RHS: True, Root: IntrinsicI);
2152}
2153
2154static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2155 patchReplacementInstruction(I, Repl);
2156 I->replaceAllUsesWith(V: Repl);
2157}
2158
2159/// Attempt to eliminate a load, first by eliminating it
2160/// locally, and then attempting non-local elimination if that fails.
2161bool GVNPass::processLoad(LoadInst *L) {
2162 if (!MD)
2163 return false;
2164
2165 // This code hasn't been audited for ordered or volatile memory access.
2166 if (!L->isUnordered())
2167 return false;
2168
2169 if (L->getType()->isTokenLikeTy())
2170 return false;
2171
2172 if (L->use_empty()) {
2173 salvageAndRemoveInstruction(I: L);
2174 return true;
2175 }
2176
2177 // ... to a pointer that has been loaded from before...
2178 MemDepResult Dep = MD->getDependency(QueryInst: L);
2179
2180 // If it is defined in another block, try harder.
2181 if (Dep.isNonLocal())
2182 return processNonLocalLoad(Load: L);
2183
2184 // Only handle the local case below.
2185 if (!Dep.isLocal()) {
2186 // This might be a NonFuncLocal or an Unknown.
2187 LLVM_DEBUG(
2188 // fast print dep, using operator<< on instruction is too slow.
2189 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2190 dbgs() << " has unknown dependence\n";);
2191 return false;
2192 }
2193
2194 auto AV = AnalyzeLoadAvailability(Load: L, DepInfo: Dep, Address: L->getPointerOperand());
2195 if (!AV)
2196 return false;
2197
2198 Value *AvailableValue = AV->MaterializeAdjustedValue(Load: L, InsertPt: L);
2199
2200 // MaterializeAdjustedValue is responsible for combining metadata.
2201 ICF->removeUsersOf(Inst: L);
2202 L->replaceAllUsesWith(V: AvailableValue);
2203 if (MSSAU)
2204 MSSAU->removeMemoryAccess(I: L);
2205 ++NumGVNLoad;
2206 reportLoadElim(Load: L, AvailableValue, ORE);
2207 salvageAndRemoveInstruction(I: L);
2208 // Tell MDA to reexamine the reused pointer since we might have more
2209 // information after forwarding it.
2210 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2211 MD->invalidateCachedPointerInfo(Ptr: AvailableValue);
2212 return true;
2213}
2214
2215// Attempt to process masked loads which have loaded from
2216// masked stores with the same mask
2217bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
2218 if (!MD)
2219 return false;
2220 MemDepResult Dep = MD->getDependency(QueryInst: I);
2221 Instruction *DepInst = Dep.getInst();
2222 if (!DepInst || !Dep.isLocal() || !Dep.isDef())
2223 return false;
2224
2225 Value *Mask = I->getOperand(i_nocapture: 1);
2226 Value *Passthrough = I->getOperand(i_nocapture: 2);
2227 Value *StoreVal;
2228 if (!match(V: DepInst,
2229 P: m_MaskedStore(Op0: m_Value(V&: StoreVal), Op1: m_Value(), Op2: m_Specific(V: Mask))) ||
2230 StoreVal->getType() != I->getType())
2231 return false;
2232
2233 // Remove the load but generate a select for the passthrough
2234 Value *OpToForward = llvm::SelectInst::Create(C: Mask, S1: StoreVal, S2: Passthrough, NameStr: "",
2235 InsertBefore: I->getIterator());
2236
2237 ICF->removeUsersOf(Inst: I);
2238 I->replaceAllUsesWith(V: OpToForward);
2239 salvageAndRemoveInstruction(I);
2240 ++NumGVNLoad;
2241 return true;
2242}
2243
2244/// Return a pair the first field showing the value number of \p Exp and the
2245/// second field showing whether it is a value number newly created.
2246std::pair<uint32_t, bool>
2247GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2248 uint32_t &E = ExpressionNumbering[Exp];
2249 bool CreateNewValNum = !E;
2250 if (CreateNewValNum) {
2251 Expressions.push_back(x: Exp);
2252 if (ExprIdx.size() < NextValueNumber + 1)
2253 ExprIdx.resize(new_size: NextValueNumber * 2);
2254 E = NextValueNumber;
2255 ExprIdx[NextValueNumber++] = NextExprNumber++;
2256 }
2257 return {E, CreateNewValNum};
2258}
2259
2260/// Return whether all the values related with the same \p num are
2261/// defined in \p BB.
2262bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2263 GVNPass &GVN) {
2264 return all_of(
2265 Range: GVN.LeaderTable.getLeaders(N: Num),
2266 P: [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2267}
2268
2269/// Wrap phiTranslateImpl to provide caching functionality.
2270uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2271 const BasicBlock *PhiBlock,
2272 uint32_t Num, GVNPass &GVN) {
2273 auto FindRes = PhiTranslateTable.find(Val: {Num, Pred});
2274 if (FindRes != PhiTranslateTable.end())
2275 return FindRes->second;
2276 uint32_t NewNum = phiTranslateImpl(BB: Pred, PhiBlock, Num, GVN);
2277 PhiTranslateTable.insert(KV: {{Num, Pred}, NewNum});
2278 return NewNum;
2279}
2280
2281// Return true if the value number \p Num and NewNum have equal value.
2282// Return false if the result is unknown.
2283bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2284 const BasicBlock *Pred,
2285 const BasicBlock *PhiBlock,
2286 GVNPass &GVN) {
2287 CallInst *Call = nullptr;
2288 auto Leaders = GVN.LeaderTable.getLeaders(N: Num);
2289 for (const auto &Entry : Leaders) {
2290 Call = dyn_cast<CallInst>(Val: Entry.Val);
2291 if (Call && Call->getParent() == PhiBlock)
2292 break;
2293 }
2294
2295 if (AA->doesNotAccessMemory(Call))
2296 return true;
2297
2298 if (!MD || !AA->onlyReadsMemory(Call))
2299 return false;
2300
2301 MemDepResult LocalDep = MD->getDependency(QueryInst: Call);
2302 if (!LocalDep.isNonLocal())
2303 return false;
2304
2305 const MemoryDependenceResults::NonLocalDepInfo &Deps =
2306 MD->getNonLocalCallDependency(QueryCall: Call);
2307
2308 // Check to see if the Call has no function local clobber.
2309 for (const NonLocalDepEntry &D : Deps) {
2310 if (D.getResult().isNonFuncLocal())
2311 return true;
2312 }
2313 return false;
2314}
2315
2316/// Translate value number \p Num using phis, so that it has the values of
2317/// the phis in BB.
2318uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2319 const BasicBlock *PhiBlock,
2320 uint32_t Num, GVNPass &GVN) {
2321 // See if we can refine the value number by looking at the PN incoming value
2322 // for the given predecessor.
2323 if (PHINode *PN = NumberingPhi[Num]) {
2324 if (PN->getParent() != PhiBlock)
2325 return Num;
2326 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
2327 if (PN->getIncomingBlock(i: I) != Pred)
2328 continue;
2329 if (uint32_t TransVal = lookup(V: PN->getIncomingValue(i: I), Verify: false))
2330 return TransVal;
2331 }
2332 return Num;
2333 }
2334
2335 if (BasicBlock *BB = NumberingBB[Num]) {
2336 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2337 // Value numbers of basic blocks are used to represent memory state in
2338 // load/store instructions and read-only function calls when said state is
2339 // set by a MemoryPhi.
2340 if (BB != PhiBlock)
2341 return Num;
2342 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2343 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2344 if (MPhi->getIncomingBlock(I: i) != Pred)
2345 continue;
2346 MemoryAccess *MA = MPhi->getIncomingValue(I: i);
2347 if (auto *PredPhi = dyn_cast<MemoryPhi>(Val: MA))
2348 return lookupOrAdd(V: PredPhi->getBlock());
2349 if (MSSA->isLiveOnEntryDef(MA))
2350 return lookupOrAdd(V: &BB->getParent()->getEntryBlock());
2351 return lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
2352 }
2353 llvm_unreachable(
2354 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2355 }
2356
2357 // If there is any value related with Num is defined in a BB other than
2358 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2359 // a backedge. We can do an early exit in that case to save compile time.
2360 if (!areAllValsInBB(Num, BB: PhiBlock, GVN))
2361 return Num;
2362
2363 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2364 return Num;
2365 Expression Exp = Expressions[ExprIdx[Num]];
2366
2367 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
2368 // For InsertValue and ExtractValue, some varargs are index numbers
2369 // instead of value numbers. Those index numbers should not be
2370 // translated.
2371 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
2372 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
2373 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
2374 continue;
2375 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Num: Exp.VarArgs[I], GVN);
2376 }
2377
2378 if (Exp.Commutative) {
2379 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
2380 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
2381 std::swap(a&: Exp.VarArgs[0], b&: Exp.VarArgs[1]);
2382 uint32_t Opcode = Exp.Opcode >> 8;
2383 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2384 Exp.Opcode = (Opcode << 8) |
2385 CmpInst::getSwappedPredicate(
2386 pred: static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
2387 }
2388 }
2389
2390 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
2391 if (Exp.Opcode == Instruction::Call && NewNum != Num)
2392 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
2393 return NewNum;
2394 }
2395 return Num;
2396}
2397
2398/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2399/// again.
2400void GVNPass::ValueTable::eraseTranslateCacheEntry(
2401 uint32_t Num, const BasicBlock &CurrBlock) {
2402 for (const BasicBlock *Pred : predecessors(BB: &CurrBlock))
2403 PhiTranslateTable.erase(Val: {Num, Pred});
2404}
2405
2406// In order to find a leader for a given value number at a
2407// specific basic block, we first obtain the list of all Values for that number,
2408// and then scan the list to find one whose block dominates the block in
2409// question. This is fast because dominator tree queries consist of only
2410// a few comparisons of DFS numbers.
2411Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
2412 auto Leaders = LeaderTable.getLeaders(N: Num);
2413 if (Leaders.empty())
2414 return nullptr;
2415
2416 Value *Val = nullptr;
2417 for (const auto &Entry : Leaders) {
2418 if (DT->dominates(A: Entry.BB, B: BB)) {
2419 Val = Entry.Val;
2420 if (isa<Constant>(Val))
2421 return Val;
2422 }
2423 }
2424
2425 return Val;
2426}
2427
2428/// There is an edge from 'Src' to 'Dst'. Return
2429/// true if every path from the entry block to 'Dst' passes via this edge. In
2430/// particular 'Dst' must not be reachable via another edge from 'Src'.
2431static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2432 DominatorTree *DT) {
2433 // While in theory it is interesting to consider the case in which Dst has
2434 // more than one predecessor, because Dst might be part of a loop which is
2435 // only reachable from Src, in practice it is pointless since at the time
2436 // GVN runs all such loops have preheaders, which means that Dst will have
2437 // been changed to have only one predecessor, namely Src.
2438 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2439 assert((!Pred || Pred == E.getStart()) &&
2440 "No edge between these basic blocks!");
2441 return Pred != nullptr;
2442}
2443
2444void GVNPass::assignBlockRPONumber(Function &F) {
2445 BlockRPONumber.clear();
2446 uint32_t NextBlockNumber = 1;
2447 ReversePostOrderTraversal<Function *> RPOT(&F);
2448 for (BasicBlock *BB : RPOT)
2449 BlockRPONumber[BB] = NextBlockNumber++;
2450 InvalidBlockRPONumbers = false;
2451}
2452
2453/// The given values are known to be equal in every use
2454/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2455/// 'RHS' everywhere in the scope. Returns whether a change was made.
2456/// The Root may either be a basic block edge (for conditions) or an
2457/// instruction (for assumes).
2458bool GVNPass::propagateEquality(
2459 Value *LHS, Value *RHS,
2460 const std::variant<BasicBlockEdge, Instruction *> &Root) {
2461 SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2462 Worklist.push_back(Elt: std::make_pair(x&: LHS, y&: RHS));
2463 bool Changed = false;
2464 SmallVector<const BasicBlock *> DominatedBlocks;
2465 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
2466 // For speed, compute a conservative fast approximation to
2467 // DT->dominates(Root, Root.getEnd());
2468 if (isOnlyReachableViaThisEdge(E: *Edge, DT))
2469 DominatedBlocks.push_back(Elt: Edge->getEnd());
2470 } else {
2471 Instruction *I = std::get<Instruction *>(v: Root);
2472 for (const auto *Node : DT->getNode(BB: I->getParent())->children())
2473 DominatedBlocks.push_back(Elt: Node->getBlock());
2474 }
2475
2476 while (!Worklist.empty()) {
2477 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2478 LHS = Item.first; RHS = Item.second;
2479
2480 if (LHS == RHS)
2481 continue;
2482 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2483
2484 // Don't try to propagate equalities between constants.
2485 if (isa<Constant>(Val: LHS) && isa<Constant>(Val: RHS))
2486 continue;
2487
2488 // Prefer a constant on the right-hand side, or an Argument if no constants.
2489 if (isa<Constant>(Val: LHS) || (isa<Argument>(Val: LHS) && !isa<Constant>(Val: RHS)))
2490 std::swap(a&: LHS, b&: RHS);
2491 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2492 const DataLayout &DL =
2493 isa<Argument>(Val: LHS)
2494 ? cast<Argument>(Val: LHS)->getParent()->getDataLayout()
2495 : cast<Instruction>(Val: LHS)->getDataLayout();
2496
2497 // If there is no obvious reason to prefer the left-hand side over the
2498 // right-hand side, ensure the longest lived term is on the right-hand side,
2499 // so the shortest lived term will be replaced by the longest lived.
2500 // This tends to expose more simplifications.
2501 uint32_t LVN = VN.lookupOrAdd(V: LHS);
2502 if ((isa<Argument>(Val: LHS) && isa<Argument>(Val: RHS)) ||
2503 (isa<Instruction>(Val: LHS) && isa<Instruction>(Val: RHS))) {
2504 // Move the 'oldest' value to the right-hand side, using the value number
2505 // as a proxy for age.
2506 uint32_t RVN = VN.lookupOrAdd(V: RHS);
2507 if (LVN < RVN) {
2508 std::swap(a&: LHS, b&: RHS);
2509 LVN = RVN;
2510 }
2511 }
2512
2513 // If value numbering later sees that an instruction in the scope is equal
2514 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2515 // the invariant that instructions only occur in the leader table for their
2516 // own value number (this is used by removeFromLeaderTable), do not do this
2517 // if RHS is an instruction (if an instruction in the scope is morphed into
2518 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2519 // using the leader table is about compiling faster, not optimizing better).
2520 // The leader table only tracks basic blocks, not edges. Only add to if we
2521 // have the simple case where the edge dominates the end.
2522 if (!isa<Instruction>(Val: RHS) && canReplacePointersIfEqual(From: LHS, To: RHS, DL))
2523 for (const BasicBlock *BB : DominatedBlocks)
2524 LeaderTable.insert(N: LVN, V: RHS, BB);
2525
2526 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2527 // LHS always has at least one use that is not dominated by Root, this will
2528 // never do anything if LHS has only one use.
2529 if (!LHS->hasOneUse()) {
2530 // Create a callback that captures the DL.
2531 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2532 return canReplacePointersInUseIfEqual(U, To, DL);
2533 };
2534 unsigned NumReplacements;
2535 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
2536 NumReplacements = replaceDominatedUsesWithIf(
2537 From: LHS, To: RHS, DT&: *DT, Edge: *Edge, ShouldReplace: CanReplacePointersCallBack);
2538 else
2539 NumReplacements = replaceDominatedUsesWithIf(
2540 From: LHS, To: RHS, DT&: *DT, I: std::get<Instruction *>(v: Root),
2541 ShouldReplace: CanReplacePointersCallBack);
2542
2543 if (NumReplacements > 0) {
2544 Changed = true;
2545 NumGVNEqProp += NumReplacements;
2546 // Cached information for anything that uses LHS will be invalid.
2547 if (MD)
2548 MD->invalidateCachedPointerInfo(Ptr: LHS);
2549 }
2550 }
2551
2552 // Now try to deduce additional equalities from this one. For example, if
2553 // the known equality was "(A != B)" == "false" then it follows that A and B
2554 // are equal in the scope. Only boolean equalities with an explicit true or
2555 // false RHS are currently supported.
2556 if (!RHS->getType()->isIntegerTy(Bitwidth: 1))
2557 // Not a boolean equality - bail out.
2558 continue;
2559 ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS);
2560 if (!CI)
2561 // RHS neither 'true' nor 'false' - bail out.
2562 continue;
2563 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2564 bool IsKnownTrue = CI->isMinusOne();
2565 bool IsKnownFalse = !IsKnownTrue;
2566
2567 // If "A && B" is known true then both A and B are known true. If "A || B"
2568 // is known false then both A and B are known false.
2569 Value *A, *B;
2570 if ((IsKnownTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) ||
2571 (IsKnownFalse && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
2572 Worklist.push_back(Elt: std::make_pair(x&: A, y&: RHS));
2573 Worklist.push_back(Elt: std::make_pair(x&: B, y&: RHS));
2574 continue;
2575 }
2576
2577 // If we are propagating an equality like "(A == B)" == "true" then also
2578 // propagate the equality A == B. When propagating a comparison such as
2579 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2580 if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: LHS)) {
2581 Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1);
2582
2583 // If "A == B" is known true, or "A != B" is known false, then replace
2584 // A with B everywhere in the scope. For floating point operations, we
2585 // have to be careful since equality does not always imply equivalance.
2586 if (Cmp->isEquivalence(Invert: IsKnownFalse))
2587 Worklist.push_back(Elt: std::make_pair(x&: Op0, y&: Op1));
2588
2589 // If "A >= B" is known true, replace "A < B" with false everywhere.
2590 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2591 Constant *NotVal = ConstantInt::get(Ty: Cmp->getType(), V: IsKnownFalse);
2592 // Since we don't have the instruction "A < B" immediately to hand, work
2593 // out the value number that it would have and use that to find an
2594 // appropriate instruction (if any).
2595 uint32_t NextNum = VN.getNextUnusedValueNumber();
2596 uint32_t Num = VN.lookupOrAddCmp(Opcode: Cmp->getOpcode(), Predicate: NotPred, LHS: Op0, RHS: Op1);
2597 // If the number we were assigned was brand new then there is no point in
2598 // looking for an instruction realizing it: there cannot be one!
2599 if (Num < NextNum) {
2600 for (const auto &Entry : LeaderTable.getLeaders(N: Num)) {
2601 // Only look at leaders that either dominate the start of the edge,
2602 // or are dominated by the end. This check is not necessary for
2603 // correctness, it only discards cases for which the following
2604 // use replacement will not work anyway.
2605 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
2606 if (!DT->dominates(A: Entry.BB, B: Edge->getStart()) &&
2607 !DT->dominates(A: Edge->getEnd(), B: Entry.BB))
2608 continue;
2609 } else {
2610 auto *InstBB = std::get<Instruction *>(v: Root)->getParent();
2611 if (!DT->dominates(A: Entry.BB, B: InstBB) &&
2612 !DT->dominates(A: InstBB, B: Entry.BB))
2613 continue;
2614 }
2615
2616 Value *NotCmp = Entry.Val;
2617 if (NotCmp && isa<Instruction>(Val: NotCmp)) {
2618 unsigned NumReplacements;
2619 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
2620 NumReplacements =
2621 replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, Edge: *Edge);
2622 else
2623 NumReplacements = replaceDominatedUsesWith(
2624 From: NotCmp, To: NotVal, DT&: *DT, I: std::get<Instruction *>(v: Root));
2625 Changed |= NumReplacements > 0;
2626 NumGVNEqProp += NumReplacements;
2627 // Cached information for anything that uses NotCmp will be invalid.
2628 if (MD)
2629 MD->invalidateCachedPointerInfo(Ptr: NotCmp);
2630 }
2631 }
2632 }
2633 // Ensure that any instruction in scope that gets the "A < B" value number
2634 // is replaced with false.
2635 // The leader table only tracks basic blocks, not edges. Only add to if we
2636 // have the simple case where the edge dominates the end.
2637 for (const BasicBlock *BB : DominatedBlocks)
2638 LeaderTable.insert(N: Num, V: NotVal, BB);
2639
2640 continue;
2641 }
2642
2643 // Propagate equalities that results from truncation with no unsigned wrap
2644 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
2645 // "false"
2646 if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V&: A)))) {
2647 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: IsKnownTrue));
2648 continue;
2649 }
2650
2651 if (match(V: LHS, P: m_Not(V: m_Value(V&: A)))) {
2652 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: !IsKnownTrue));
2653 continue;
2654 }
2655 }
2656
2657 return Changed;
2658}
2659
2660/// When calculating availability, handle an instruction
2661/// by inserting it into the appropriate sets.
2662bool GVNPass::processInstruction(Instruction *I) {
2663 // If the instruction can be easily simplified then do so now in preference
2664 // to value numbering it. Value numbering often exposes redundancies, for
2665 // example if it determines that %y is equal to %x then the instruction
2666 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2667 const DataLayout &DL = I->getDataLayout();
2668 if (Value *V = simplifyInstruction(I, Q: {DL, TLI, DT, AC})) {
2669 bool Changed = false;
2670 if (!I->use_empty()) {
2671 // Simplification can cause a special instruction to become not special.
2672 // For example, devirtualization to a willreturn function.
2673 ICF->removeUsersOf(Inst: I);
2674 I->replaceAllUsesWith(V);
2675 Changed = true;
2676 }
2677 if (isInstructionTriviallyDead(I, TLI)) {
2678 salvageAndRemoveInstruction(I);
2679 Changed = true;
2680 }
2681 if (Changed) {
2682 if (MD && V->getType()->isPtrOrPtrVectorTy())
2683 MD->invalidateCachedPointerInfo(Ptr: V);
2684 ++NumGVNSimpl;
2685 return true;
2686 }
2687 }
2688
2689 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
2690 return processAssumeIntrinsic(IntrinsicI: Assume);
2691
2692 if (LoadInst *Load = dyn_cast<LoadInst>(Val: I)) {
2693 if (processLoad(L: Load))
2694 return true;
2695
2696 unsigned Num = VN.lookupOrAdd(V: Load);
2697 LeaderTable.insert(N: Num, V: Load, BB: Load->getParent());
2698 return false;
2699 }
2700
2701 if (match(V: I, P: m_Intrinsic<Intrinsic::masked_load>()) &&
2702 processMaskedLoad(I: cast<IntrinsicInst>(Val: I)))
2703 return true;
2704
2705 // For conditional branches, we can perform simple conditional propagation on
2706 // the condition value itself.
2707 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: I)) {
2708 if (isa<Constant>(Val: BI->getCondition()))
2709 return processFoldableCondBr(BI);
2710
2711 Value *BranchCond = BI->getCondition();
2712 BasicBlock *TrueSucc = BI->getSuccessor(i: 0);
2713 BasicBlock *FalseSucc = BI->getSuccessor(i: 1);
2714 // Avoid multiple edges early.
2715 if (TrueSucc == FalseSucc)
2716 return false;
2717
2718 BasicBlock *Parent = BI->getParent();
2719 bool Changed = false;
2720
2721 Value *TrueVal = ConstantInt::getTrue(Context&: TrueSucc->getContext());
2722 BasicBlockEdge TrueE(Parent, TrueSucc);
2723 Changed |= propagateEquality(LHS: BranchCond, RHS: TrueVal, Root: TrueE);
2724
2725 Value *FalseVal = ConstantInt::getFalse(Context&: FalseSucc->getContext());
2726 BasicBlockEdge FalseE(Parent, FalseSucc);
2727 Changed |= propagateEquality(LHS: BranchCond, RHS: FalseVal, Root: FalseE);
2728
2729 return Changed;
2730 }
2731
2732 // For switches, propagate the case values into the case destinations.
2733 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: I)) {
2734 Value *SwitchCond = SI->getCondition();
2735 BasicBlock *Parent = SI->getParent();
2736 bool Changed = false;
2737
2738 // Remember how many outgoing edges there are to every successor.
2739 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2740 for (BasicBlock *Succ : successors(BB: Parent))
2741 ++SwitchEdges[Succ];
2742
2743 for (const auto &Case : SI->cases()) {
2744 BasicBlock *Dst = Case.getCaseSuccessor();
2745 // If there is only a single edge, propagate the case value into it.
2746 if (SwitchEdges.lookup(Val: Dst) == 1) {
2747 BasicBlockEdge E(Parent, Dst);
2748 Changed |= propagateEquality(LHS: SwitchCond, RHS: Case.getCaseValue(), Root: E);
2749 }
2750 }
2751 return Changed;
2752 }
2753
2754 // Instructions with void type don't return a value, so there's
2755 // no point in trying to find redundancies in them.
2756 if (I->getType()->isVoidTy())
2757 return false;
2758
2759 uint32_t NextNum = VN.getNextUnusedValueNumber();
2760 unsigned Num = VN.lookupOrAdd(V: I);
2761
2762 // Allocations are always uniquely numbered, so we can save time and memory
2763 // by fast failing them.
2764 if (isa<AllocaInst>(Val: I) || I->isTerminator() || isa<PHINode>(Val: I)) {
2765 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2766 return false;
2767 }
2768
2769 // If the number we were assigned was a brand new VN, then we don't
2770 // need to do a lookup to see if the number already exists
2771 // somewhere in the domtree: it can't!
2772 if (Num >= NextNum) {
2773 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2774 return false;
2775 }
2776
2777 // Perform fast-path value-number based elimination of values inherited from
2778 // dominators.
2779 Value *Repl = findLeader(BB: I->getParent(), Num);
2780 if (!Repl) {
2781 // Failure, just remember this instance for future use.
2782 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2783 return false;
2784 }
2785
2786 if (Repl == I) {
2787 // If I was the result of a shortcut PRE, it might already be in the table
2788 // and the best replacement for itself. Nothing to do.
2789 return false;
2790 }
2791
2792 // Remove it!
2793 patchAndReplaceAllUsesWith(I, Repl);
2794 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2795 MD->invalidateCachedPointerInfo(Ptr: Repl);
2796 salvageAndRemoveInstruction(I);
2797 return true;
2798}
2799
2800/// runOnFunction - This is the main transformation entry point for a function.
2801bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2802 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2803 MemoryDependenceResults *RunMD, LoopInfo &LI,
2804 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2805 AC = &RunAC;
2806 DT = &RunDT;
2807 VN.setDomTree(DT);
2808 TLI = &RunTLI;
2809 VN.setAliasAnalysis(&RunAA);
2810 MD = RunMD;
2811 ImplicitControlFlowTracking ImplicitCFT;
2812 ICF = &ImplicitCFT;
2813 this->LI = &LI;
2814 VN.setMemDep(M: MD);
2815 VN.setMemorySSA(M: MSSA);
2816 ORE = RunORE;
2817 InvalidBlockRPONumbers = true;
2818 MemorySSAUpdater Updater(MSSA);
2819 MSSAU = MSSA ? &Updater : nullptr;
2820
2821 bool Changed = false;
2822 bool ShouldContinue = true;
2823
2824 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2825 // Merge unconditional branches, allowing PRE to catch more
2826 // optimization opportunities.
2827 for (BasicBlock &BB : make_early_inc_range(Range&: F)) {
2828 bool RemovedBlock = MergeBlockIntoPredecessor(BB: &BB, DTU: &DTU, LI: &LI, MSSAU, MemDep: MD);
2829 if (RemovedBlock)
2830 ++NumGVNBlocks;
2831
2832 Changed |= RemovedBlock;
2833 }
2834 DTU.flush();
2835
2836 unsigned Iteration = 0;
2837 while (ShouldContinue) {
2838 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2839 (void) Iteration;
2840 ShouldContinue = iterateOnFunction(F);
2841 Changed |= ShouldContinue;
2842 ++Iteration;
2843 }
2844
2845 if (isPREEnabled()) {
2846 // Fabricate val-num for dead-code in order to suppress assertion in
2847 // performPRE().
2848 assignValNumForDeadCode();
2849 bool PREChanged = true;
2850 while (PREChanged) {
2851 PREChanged = performPRE(F);
2852 Changed |= PREChanged;
2853 }
2854 }
2855
2856 // FIXME: Should perform GVN again after PRE does something. PRE can move
2857 // computations into blocks where they become fully redundant. Note that
2858 // we can't do this until PRE's critical edge splitting updates memdep.
2859 // Actually, when this happens, we should just fully integrate PRE into GVN.
2860
2861 cleanupGlobalSets();
2862 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2863 // iteration.
2864 DeadBlocks.clear();
2865
2866 if (MSSA && VerifyMemorySSA)
2867 MSSA->verifyMemorySSA();
2868
2869 return Changed;
2870}
2871
2872bool GVNPass::processBlock(BasicBlock *BB) {
2873 if (DeadBlocks.count(key: BB))
2874 return false;
2875
2876 bool ChangedFunction = false;
2877
2878 // Since we may not have visited the input blocks of the phis, we can't
2879 // use our normal hash approach for phis. Instead, simply look for
2880 // obvious duplicates. The first pass of GVN will tend to create
2881 // identical phis, and the second or later passes can eliminate them.
2882 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2883 ChangedFunction |= EliminateDuplicatePHINodes(BB, ToRemove&: PHINodesToRemove);
2884 for (PHINode *PN : PHINodesToRemove) {
2885 removeInstruction(I: PN);
2886 }
2887 for (Instruction &Inst : make_early_inc_range(Range&: *BB))
2888 ChangedFunction |= processInstruction(I: &Inst);
2889 return ChangedFunction;
2890}
2891
2892// Instantiate an expression in a predecessor that lacked it.
2893bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2894 BasicBlock *Curr, unsigned int ValNo) {
2895 // Because we are going top-down through the block, all value numbers
2896 // will be available in the predecessor by the time we need them. Any
2897 // that weren't originally present will have been instantiated earlier
2898 // in this loop.
2899 bool Success = true;
2900 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
2901 Value *Op = Instr->getOperand(i: I);
2902 if (isa<Argument>(Val: Op) || isa<Constant>(Val: Op) || isa<GlobalValue>(Val: Op))
2903 continue;
2904 // This could be a newly inserted instruction, in which case, we won't
2905 // find a value number, and should give up before we hurt ourselves.
2906 // FIXME: Rewrite the infrastructure to let it easier to value number
2907 // and process newly inserted instructions.
2908 if (!VN.exists(V: Op)) {
2909 Success = false;
2910 break;
2911 }
2912 uint32_t TValNo =
2913 VN.phiTranslate(Pred, PhiBlock: Curr, Num: VN.lookup(V: Op), GVN&: *this);
2914 if (Value *V = findLeader(BB: Pred, Num: TValNo)) {
2915 Instr->setOperand(i: I, Val: V);
2916 } else {
2917 Success = false;
2918 break;
2919 }
2920 }
2921
2922 // Fail out if we encounter an operand that is not available in
2923 // the PRE predecessor. This is typically because of loads which
2924 // are not value numbered precisely.
2925 if (!Success)
2926 return false;
2927
2928 Instr->insertBefore(InsertPos: Pred->getTerminator()->getIterator());
2929 Instr->setName(Instr->getName() + ".pre");
2930 Instr->setDebugLoc(Instr->getDebugLoc());
2931
2932 ICF->insertInstructionTo(Inst: Instr, BB: Pred);
2933
2934 unsigned Num = VN.lookupOrAdd(V: Instr);
2935 VN.add(V: Instr, Num);
2936
2937 // Update the availability map to include the new instruction.
2938 LeaderTable.insert(N: Num, V: Instr, BB: Pred);
2939 return true;
2940}
2941
2942bool GVNPass::performScalarPRE(Instruction *CurInst) {
2943 if (isa<AllocaInst>(Val: CurInst) || CurInst->isTerminator() ||
2944 isa<PHINode>(Val: CurInst) || CurInst->getType()->isVoidTy() ||
2945 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2946 CurInst->getType()->isTokenLikeTy())
2947 return false;
2948
2949 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2950 // sinking the compare again, and it would force the code generator to
2951 // move the i1 from processor flags or predicate registers into a general
2952 // purpose register.
2953 if (isa<CmpInst>(Val: CurInst))
2954 return false;
2955
2956 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2957 // sinking the addressing mode computation back to its uses. Extending the
2958 // GEP's live range increases the register pressure, and therefore it can
2959 // introduce unnecessary spills.
2960 //
2961 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2962 // to the load by moving it to the predecessor block if necessary.
2963 if (isa<GetElementPtrInst>(Val: CurInst))
2964 return false;
2965
2966 if (auto *CallB = dyn_cast<CallBase>(Val: CurInst)) {
2967 // We don't currently value number ANY inline asm calls.
2968 if (CallB->isInlineAsm())
2969 return false;
2970 }
2971
2972 // Protected pointer field loads/stores should be paired with the intrinsic
2973 // to avoid unnecessary address escapes.
2974 if (auto *II = dyn_cast<IntrinsicInst>(Val: CurInst))
2975 if (II->getIntrinsicID() == Intrinsic::protected_field_ptr)
2976 return false;
2977
2978 uint32_t ValNo = VN.lookup(V: CurInst);
2979
2980 // Look for the predecessors for PRE opportunities. We're
2981 // only trying to solve the basic diamond case, where
2982 // a value is computed in the successor and one predecessor,
2983 // but not the other. We also explicitly disallow cases
2984 // where the successor is its own predecessor, because they're
2985 // more complicated to get right.
2986 unsigned NumWith = 0;
2987 unsigned NumWithout = 0;
2988 BasicBlock *PREPred = nullptr;
2989 BasicBlock *CurrentBlock = CurInst->getParent();
2990
2991 // Update the RPO numbers for this function.
2992 if (InvalidBlockRPONumbers)
2993 assignBlockRPONumber(F&: *CurrentBlock->getParent());
2994
2995 SmallVector<std::pair<Value *, BasicBlock *>, 8> PredMap;
2996 for (BasicBlock *P : predecessors(BB: CurrentBlock)) {
2997 // We're not interested in PRE where blocks with predecessors that are
2998 // not reachable.
2999 if (!DT->isReachableFromEntry(A: P)) {
3000 NumWithout = 2;
3001 break;
3002 }
3003 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
3004 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
3005 "Invalid BlockRPONumber map.");
3006 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
3007 NumWithout = 2;
3008 break;
3009 }
3010
3011 uint32_t TValNo = VN.phiTranslate(Pred: P, PhiBlock: CurrentBlock, Num: ValNo, GVN&: *this);
3012 Value *PredV = findLeader(BB: P, Num: TValNo);
3013 if (!PredV) {
3014 PredMap.push_back(Elt: std::make_pair(x: static_cast<Value *>(nullptr), y&: P));
3015 PREPred = P;
3016 ++NumWithout;
3017 } else if (PredV == CurInst) {
3018 // CurInst dominates this predecessor.
3019 NumWithout = 2;
3020 break;
3021 } else {
3022 PredMap.push_back(Elt: std::make_pair(x&: PredV, y&: P));
3023 ++NumWith;
3024 }
3025 }
3026
3027 // Don't do PRE when it might increase code size, i.e. when
3028 // we would need to insert instructions in more than one pred.
3029 if (NumWithout > 1 || NumWith == 0)
3030 return false;
3031
3032 // We may have a case where all predecessors have the instruction,
3033 // and we just need to insert a phi node. Otherwise, perform
3034 // insertion.
3035 Instruction *PREInstr = nullptr;
3036
3037 if (NumWithout != 0) {
3038 if (!isSafeToSpeculativelyExecute(I: CurInst)) {
3039 // It is only valid to insert a new instruction if the current instruction
3040 // is always executed. An instruction with implicit control flow could
3041 // prevent us from doing it. If we cannot speculate the execution, then
3042 // PRE should be prohibited.
3043 if (ICF->isDominatedByICFIFromSameBlock(Insn: CurInst))
3044 return false;
3045 }
3046
3047 // Don't do PRE across indirect branch.
3048 if (isa<IndirectBrInst>(Val: PREPred->getTerminator()))
3049 return false;
3050
3051 // We can't do PRE safely on a critical edge, so instead we schedule
3052 // the edge to be split and perform the PRE the next time we iterate
3053 // on the function.
3054 unsigned SuccNum = GetSuccessorNumber(BB: PREPred, Succ: CurrentBlock);
3055 if (isCriticalEdge(TI: PREPred->getTerminator(), SuccNum)) {
3056 ToSplit.push_back(Elt: std::make_pair(x: PREPred->getTerminator(), y&: SuccNum));
3057 return false;
3058 }
3059 // We need to insert somewhere, so let's give it a shot.
3060 PREInstr = CurInst->clone();
3061 if (!performScalarPREInsertion(Instr: PREInstr, Pred: PREPred, Curr: CurrentBlock, ValNo)) {
3062 // If we failed insertion, make sure we remove the instruction.
3063#ifndef NDEBUG
3064 verifyRemoved(PREInstr);
3065#endif
3066 PREInstr->deleteValue();
3067 return false;
3068 }
3069 }
3070
3071 // Either we should have filled in the PRE instruction, or we should
3072 // not have needed insertions.
3073 assert(PREInstr != nullptr || NumWithout == 0);
3074
3075 ++NumGVNPRE;
3076
3077 // Create a PHI to make the value available in this block.
3078 PHINode *Phi = PHINode::Create(Ty: CurInst->getType(), NumReservedValues: PredMap.size(),
3079 NameStr: CurInst->getName() + ".pre-phi");
3080 Phi->insertBefore(InsertPos: CurrentBlock->begin());
3081 for (auto &[V, BB] : PredMap) {
3082 if (V) {
3083 // If we use an existing value in this phi, we have to patch the original
3084 // value because the phi will be used to replace a later value.
3085 patchReplacementInstruction(I: CurInst, Repl: V);
3086 Phi->addIncoming(V, BB);
3087 } else
3088 Phi->addIncoming(V: PREInstr, BB: PREPred);
3089 }
3090
3091 VN.add(V: Phi, Num: ValNo);
3092 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3093 // be changed, so erase the related stale entries in phi translate cache.
3094 VN.eraseTranslateCacheEntry(Num: ValNo, CurrBlock: *CurrentBlock);
3095 LeaderTable.insert(N: ValNo, V: Phi, BB: CurrentBlock);
3096 Phi->setDebugLoc(CurInst->getDebugLoc());
3097 CurInst->replaceAllUsesWith(V: Phi);
3098 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3099 MD->invalidateCachedPointerInfo(Ptr: Phi);
3100 LeaderTable.erase(N: ValNo, I: CurInst, BB: CurrentBlock);
3101
3102 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3103 removeInstruction(I: CurInst);
3104
3105 return true;
3106}
3107
3108/// Perform a purely local form of PRE that looks for diamond
3109/// control flow patterns and attempts to perform simple PRE at the join point.
3110bool GVNPass::performPRE(Function &F) {
3111 bool Changed = false;
3112 for (BasicBlock *CurrentBlock : depth_first(G: &F.getEntryBlock())) {
3113 // Nothing to PRE in the entry block.
3114 if (CurrentBlock == &F.getEntryBlock())
3115 continue;
3116
3117 // Don't perform PRE on an EH pad.
3118 if (CurrentBlock->isEHPad())
3119 continue;
3120
3121 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3122 BE = CurrentBlock->end();
3123 BI != BE;) {
3124 Instruction *CurInst = &*BI++;
3125 Changed |= performScalarPRE(CurInst);
3126 }
3127 }
3128
3129 if (splitCriticalEdges())
3130 Changed = true;
3131
3132 return Changed;
3133}
3134
3135/// Split the critical edge connecting the given two blocks, and return
3136/// the block inserted to the critical edge.
3137BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3138 // GVN does not require loop-simplify, do not try to preserve it if it is not
3139 // possible.
3140 BasicBlock *BB = SplitCriticalEdge(
3141 Src: Pred, Dst: Succ,
3142 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3143 if (BB) {
3144 if (MD)
3145 MD->invalidateCachedPredecessors();
3146 InvalidBlockRPONumbers = true;
3147 }
3148 return BB;
3149}
3150
3151/// Split critical edges found during the previous
3152/// iteration that may enable further optimization.
3153bool GVNPass::splitCriticalEdges() {
3154 if (ToSplit.empty())
3155 return false;
3156
3157 bool Changed = false;
3158 do {
3159 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3160 Changed |= SplitCriticalEdge(TI: Edge.first, SuccNum: Edge.second,
3161 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3162 nullptr;
3163 } while (!ToSplit.empty());
3164 if (Changed) {
3165 if (MD)
3166 MD->invalidateCachedPredecessors();
3167 InvalidBlockRPONumbers = true;
3168 }
3169 return Changed;
3170}
3171
3172/// Executes one iteration of GVN.
3173bool GVNPass::iterateOnFunction(Function &F) {
3174 cleanupGlobalSets();
3175
3176 // Top-down walk of the dominator tree.
3177 bool Changed = false;
3178 // Needed for value numbering with phi construction to work.
3179 // RPOT walks the graph in its constructor and will not be invalidated during
3180 // processBlock.
3181 ReversePostOrderTraversal<Function *> RPOT(&F);
3182
3183 for (BasicBlock *BB : RPOT)
3184 Changed |= processBlock(BB);
3185
3186 return Changed;
3187}
3188
3189void GVNPass::cleanupGlobalSets() {
3190 VN.clear();
3191 LeaderTable.clear();
3192 BlockRPONumber.clear();
3193 ICF->clear();
3194 InvalidBlockRPONumbers = true;
3195}
3196
3197void GVNPass::removeInstruction(Instruction *I) {
3198 VN.erase(V: I);
3199 if (MD) MD->removeInstruction(InstToRemove: I);
3200 if (MSSAU)
3201 MSSAU->removeMemoryAccess(I);
3202#ifndef NDEBUG
3203 verifyRemoved(I);
3204#endif
3205 ICF->removeInstruction(Inst: I);
3206 I->eraseFromParent();
3207 ++NumGVNInstr;
3208}
3209
3210/// Verify that the specified instruction does not occur in our
3211/// internal data structures.
3212void GVNPass::verifyRemoved(const Instruction *Inst) const {
3213 VN.verifyRemoved(V: Inst);
3214 LeaderTable.verifyRemoved(V: Inst);
3215}
3216
3217/// BB is declared dead, which implied other blocks become dead as well. This
3218/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3219/// live successors, update their phi nodes by replacing the operands
3220/// corresponding to dead blocks with UndefVal.
3221void GVNPass::addDeadBlock(BasicBlock *BB) {
3222 SmallVector<BasicBlock *, 4> NewDead;
3223 SmallSetVector<BasicBlock *, 4> DF;
3224
3225 NewDead.push_back(Elt: BB);
3226 while (!NewDead.empty()) {
3227 BasicBlock *D = NewDead.pop_back_val();
3228 if (DeadBlocks.count(key: D))
3229 continue;
3230
3231 // All blocks dominated by D are dead.
3232 SmallVector<BasicBlock *, 8> Dom;
3233 DT->getDescendants(R: D, Result&: Dom);
3234 DeadBlocks.insert_range(R&: Dom);
3235
3236 // Figure out the dominance-frontier(D).
3237 for (BasicBlock *B : Dom) {
3238 for (BasicBlock *S : successors(BB: B)) {
3239 if (DeadBlocks.count(key: S))
3240 continue;
3241
3242 bool AllPredDead = true;
3243 for (BasicBlock *P : predecessors(BB: S))
3244 if (!DeadBlocks.count(key: P)) {
3245 AllPredDead = false;
3246 break;
3247 }
3248
3249 if (!AllPredDead) {
3250 // S could be proved dead later on. That is why we don't update phi
3251 // operands at this moment.
3252 DF.insert(X: S);
3253 } else {
3254 // While S is not dominated by D, it is dead by now. This could take
3255 // place if S already have a dead predecessor before D is declared
3256 // dead.
3257 NewDead.push_back(Elt: S);
3258 }
3259 }
3260 }
3261 }
3262
3263 // For the dead blocks' live successors, update their phi nodes by replacing
3264 // the operands corresponding to dead blocks with UndefVal.
3265 for (BasicBlock *B : DF) {
3266 if (DeadBlocks.count(key: B))
3267 continue;
3268
3269 // First, split the critical edges. This might also create additional blocks
3270 // to preserve LoopSimplify form and adjust edges accordingly.
3271 SmallVector<BasicBlock *, 4> Preds(predecessors(BB: B));
3272 for (BasicBlock *P : Preds) {
3273 if (!DeadBlocks.count(key: P))
3274 continue;
3275
3276 if (is_contained(Range: successors(BB: P), Element: B) &&
3277 isCriticalEdge(TI: P->getTerminator(), Succ: B)) {
3278 if (BasicBlock *S = splitCriticalEdges(Pred: P, Succ: B))
3279 DeadBlocks.insert(X: P = S);
3280 }
3281 }
3282
3283 // Now poison the incoming values from the dead predecessors.
3284 for (BasicBlock *P : predecessors(BB: B)) {
3285 if (!DeadBlocks.count(key: P))
3286 continue;
3287 for (PHINode &Phi : B->phis()) {
3288 Phi.setIncomingValueForBlock(BB: P, V: PoisonValue::get(T: Phi.getType()));
3289 if (MD)
3290 MD->invalidateCachedPointerInfo(Ptr: &Phi);
3291 }
3292 }
3293 }
3294}
3295
3296// If the given branch is recognized as a foldable branch (i.e. conditional
3297// branch with constant condition), it will perform following analyses and
3298// transformation.
3299// 1) If the dead out-coming edge is a critical-edge, split it. Let
3300// R be the target of the dead out-coming edge.
3301// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3302// edge. The result of this step will be {X| X is dominated by R}
3303// 2) Identify those blocks which haves at least one dead predecessor. The
3304// result of this step will be dominance-frontier(R).
3305// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3306// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3307//
3308// Return true iff *NEW* dead code are found.
3309bool GVNPass::processFoldableCondBr(CondBrInst *BI) {
3310 // If a branch has two identical successors, we cannot declare either dead.
3311 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
3312 return false;
3313
3314 ConstantInt *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition());
3315 if (!Cond)
3316 return false;
3317
3318 BasicBlock *DeadRoot =
3319 Cond->getZExtValue() ? BI->getSuccessor(i: 1) : BI->getSuccessor(i: 0);
3320 if (DeadBlocks.count(key: DeadRoot))
3321 return false;
3322
3323 if (!DeadRoot->getSinglePredecessor())
3324 DeadRoot = splitCriticalEdges(Pred: BI->getParent(), Succ: DeadRoot);
3325
3326 addDeadBlock(BB: DeadRoot);
3327 return true;
3328}
3329
3330// performPRE() will trigger assert if it comes across an instruction without
3331// associated val-num. As it normally has far more live instructions than dead
3332// instructions, it makes more sense just to "fabricate" a val-number for the
3333// dead code than checking if instruction involved is dead or not.
3334void GVNPass::assignValNumForDeadCode() {
3335 for (BasicBlock *BB : DeadBlocks) {
3336 for (Instruction &Inst : *BB) {
3337 unsigned ValNum = VN.lookupOrAdd(V: &Inst);
3338 LeaderTable.insert(N: ValNum, V: &Inst, BB);
3339 }
3340 }
3341}
3342
3343class llvm::gvn::GVNLegacyPass : public FunctionPass {
3344public:
3345 static char ID; // Pass identification, replacement for typeid.
3346
3347 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3348 bool MemSSAAnalysis = GVNEnableMemorySSA)
3349 : FunctionPass(ID), Impl(GVNOptions()
3350 .setMemDep(MemDepAnalysis)
3351 .setMemorySSA(MemSSAAnalysis)) {
3352 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3353 }
3354
3355 bool runOnFunction(Function &F) override {
3356 if (skipFunction(F))
3357 return false;
3358
3359 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3360 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3361 MSSAWP = &getAnalysis<MemorySSAWrapperPass>();
3362
3363 return Impl.runImpl(
3364 F, RunAC&: getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3365 RunDT&: getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3366 RunTLI: getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3367 RunAA&: getAnalysis<AAResultsWrapperPass>().getAAResults(),
3368 RunMD: Impl.isMemDepEnabled()
3369 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3370 : nullptr,
3371 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3372 RunORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3373 MSSA: MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3374 }
3375
3376 void getAnalysisUsage(AnalysisUsage &AU) const override {
3377 AU.addRequired<AssumptionCacheTracker>();
3378 AU.addRequired<DominatorTreeWrapperPass>();
3379 AU.addRequired<TargetLibraryInfoWrapperPass>();
3380 AU.addRequired<LoopInfoWrapperPass>();
3381 if (Impl.isMemDepEnabled())
3382 AU.addRequired<MemoryDependenceWrapperPass>();
3383 AU.addRequired<AAResultsWrapperPass>();
3384 AU.addPreserved<DominatorTreeWrapperPass>();
3385 AU.addPreserved<GlobalsAAWrapperPass>();
3386 AU.addPreserved<TargetLibraryInfoWrapperPass>();
3387 AU.addPreserved<LoopInfoWrapperPass>();
3388 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3389 AU.addPreserved<MemorySSAWrapperPass>();
3390 if (Impl.isMemorySSAEnabled())
3391 AU.addRequired<MemorySSAWrapperPass>();
3392 }
3393
3394private:
3395 GVNPass Impl;
3396};
3397
3398char GVNLegacyPass::ID = 0;
3399
3400INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3401INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3402INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3403INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
3404INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3405INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3406INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3407INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3408INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3409INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3410
3411// The public interface to this file...
3412FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); }
3413